From ec24393a0af735507e30342deafa1ad42797c058 Mon Sep 17 00:00:00 2001 From: Dougal Seeley Date: Sat, 22 Aug 2020 12:52:56 +0100 Subject: [PATCH 01/58] Add functionality to support free ESXI, (using https://github.com/dseeley/esxifree_guest). Add redeploy scheme (_scheme_rmvm_keepdisk_only), which supports copying or moving the disks from previous cluster member to new member (only support esxi-free to date). --- EXAMPLE/Pipfile | 1 + EXAMPLE/cluster.yml | 17 +++-- EXAMPLE/group_vars/_skel/cluster_vars.yml | 45 ++++++++++-- .../group_vars/test_aws_euw1/cluster_vars.yml | 12 ++-- .../group_vars/test_gcp_euw1/cluster_vars.yml | 10 +-- _dependencies/filter_plugins/custom.py | 9 ++- clean/tasks/clean_vms.yml | 24 +++++++ .../tasks/get_cluster_hosts_state.yml | 31 +++++---- .../tasks/get_cluster_hosts_target.yml | 15 +++- config/tasks/disks_auto.yml | 3 +- config/tasks/main.yml | 4 +- config/tasks/pkgupdate.yml | 16 ++--- create/tasks/esxifree.yml | 39 +++++++++++ create/tasks/main.yml | 9 +-- dynamic_inventory/tasks/esxifree.yml | 45 ++++++++++++ dynamic_inventory/tasks/main.yml | 16 ++--- redeploy/__common/tasks/poweroff_vms.yml | 24 +++++++ redeploy/__common/tasks/poweron_vms.yml | 12 ++++ .../tasks/set_lifecycle_state_label.yml | 12 ++++ .../tasks/_add_diskinfo_esxifree.yml | 36 ++++++++++ .../tasks/by_hosttype.yml | 23 +++++++ .../tasks/by_hosttype_by_host.yml | 51 ++++++++++++++ .../_scheme_rmvm_keepdisk_only/tasks/main.yml | 69 +++++++++++++++++++ redeploy/tasks/main.yml | 3 +- 24 files changed, 455 insertions(+), 71 deletions(-) create mode 100644 create/tasks/esxifree.yml create mode 100644 dynamic_inventory/tasks/esxifree.yml create mode 100644 redeploy/_scheme_rmvm_keepdisk_only/tasks/_add_diskinfo_esxifree.yml create mode 100644 redeploy/_scheme_rmvm_keepdisk_only/tasks/by_hosttype.yml create mode 100644 redeploy/_scheme_rmvm_keepdisk_only/tasks/by_hosttype_by_host.yml create mode 100644 redeploy/_scheme_rmvm_keepdisk_only/tasks/main.yml diff --git a/EXAMPLE/Pipfile b/EXAMPLE/Pipfile index bed02bb4..8a16c139 100644 --- a/EXAMPLE/Pipfile +++ b/EXAMPLE/Pipfile @@ -14,6 +14,7 @@ jmespath = "*" dnspython = "*" google-auth = "*" google-api-python-client = "*" +paramiko = "*" [dev-packages] diff --git a/EXAMPLE/cluster.yml b/EXAMPLE/cluster.yml index 701d7c7c..94af2e14 100644 --- a/EXAMPLE/cluster.yml +++ b/EXAMPLE/cluster.yml @@ -3,22 +3,25 @@ - name: Deploy the cluster hosts: localhost connection: local - roles: - - { role: clusterverse/clean, tags: [clusterverse_clean], when: clean is defined } - - { role: clusterverse/create, tags: [clusterverse_create] } - - { role: clusterverse/dynamic_inventory, tags: [clusterverse_dynamic_inventory] } + tasks: + - { import_role: { name: clusterverse/clean }, tags: [clusterverse_clean], when: clean is defined } # Alternative include_role (need to force the tags): - { include_role: { name: clusterverse/clean, apply: {tags: [clusterverse_clean]}}, tags: [clusterverse_clean], when: clean is defined } + - { import_role: { name: clusterverse/create }, tags: [clusterverse_create] } + - { import_role: { name: clusterverse/dynamic_inventory }, tags: [clusterverse_dynamic_inventory] } - name: Configure the cluster hosts: all - roles: [ { role: clusterverse/config, tags: [clusterverse_config] } ] + tasks: + - { import_role: { name: clusterverse/config }, tags: [clusterverse_config] } ## Application roles - name: Test application role hosts: all - roles: [ { role: testrole, tags: [testrole] } ] + tasks: + - { import_role: { name: "testrole" }, tags: [testrole] } ## - name: Perform cluster readiness operations hosts: localhost connection: local - roles: [ { role: clusterverse/readiness, tags: [clusterverse_readiness] } ] + tasks: + - { import_role: { name: clusterverse/readiness }, tags: [clusterverse_readiness] } diff --git a/EXAMPLE/group_vars/_skel/cluster_vars.yml b/EXAMPLE/group_vars/_skel/cluster_vars.yml index 1aec269f..92498340 100644 --- a/EXAMPLE/group_vars/_skel/cluster_vars.yml +++ b/EXAMPLE/group_vars/_skel/cluster_vars.yml @@ -1,5 +1,7 @@ --- +redeploy_schemes_supported: [] + # GCP credentials gcp_credentials_file: "{{ lookup('env','GCP_CREDENTIALS') | default('/dev/null', true) }}" gcp_credentials_json: "{{ lookup('file', gcp_credentials_file) | default({'project_id': 'GCP_CREDENTIALS__NOT_SET','client_email': 'GCP_CREDENTIALS__NOT_SET'}, true) }}" @@ -35,10 +37,7 @@ cloud_agent: ## Bind configuration and credentials, per environment bind9: - sandbox: - server: - key_name: - key_secret: + sandbox: {server: "", key_name: "", key_secret: ""} cluster_name: "{{app_name}}-{{buildenv}}" # Identifies the cluster within the cloud environment @@ -131,3 +130,41 @@ cluster_name: "{{app_name}}-{{buildenv}}" # Identifies the cluster within #_region: *region #_ssh_guard_whitelist: *ssh_guard_whitelist #_dns_nameserver_zone: *dns_nameserver_zone + +### ESXi-free example +#cluster_vars: +# type: &cloud_type "esxifree" +# image: "gold-ubuntu2004-20200411145623" +# esxi_ip: "10.189.132.4" +# username: "svc" +# password: "" +# dns_cloud_internal_domain: "" # The cloud-internal zone as defined by the cloud provider (e.g. GCP, AWS) +# dns_nameserver_zone: &dns_nameserver_zone "" # The zone that dns_server will operate on. gcloud dns needs a trailing '.'. Leave blank if no external DNS (use IPs only) +# dns_user_domain: "{%- if _dns_nameserver_zone -%}{{_dns_nameserver_zone}}{%- endif -%}" +# dns_server: "" # Specify DNS server. nsupdate, route53 or clouddns. If empty string is specified, no DNS will be added. +# custom_tagslabels: +# inv_environment_id: "{{buildenv}}" +# inv_service_id: "{{app_class}}" +# inv_cluster_id: "{{cluster_name}}" +# inv_cluster_type: "{{app_name}}" +# datastore: "datastore1" +# hardware_version: "15" +# cloudinit_userdata: +# - name: user1 +# groups: "admin" +# lock_passwd: true +# ssh_authorized_keys: ['ssh-rsa AAAzaC1yc2EAAAADAQ...dojtl6mzVnSL29LQ=='] +# passwd: $6$j322wezy...m2RrkJPfghBZMN1O/ +# sandbox: +# networks: +# - networkName: "VM Network" +# virtualDev: vmxnet3 +# #macAddress: "00:0c:29:be:51:d0" #dev01 +# cloudinit_netplan: +# ethernets: {eth0: { dhcp4: true }, nameservers: { addresses: ["8.8.8.8", "8.8.4.4"] } } +# hosttype_vars: +# sys: {vms_by_az: {a: 1, b: 0, c: 0}, flavor: {num_cpus: "2", memory_mb: "2048"}, version: "{{sys_version | default('')}}", auto_volumes: []} +# #sysdisks: {vms_by_az: {a: 1, b: 0, c: 0}, flavor: {num_cpus: "2", memory_mb: "2048"}, version: "{{sys_version | default('')}}", auto_volumes: [{mountpoint: "/media/mysvc", volume_size: 2, provisioning_type: "thin", fstype: "ext4"}]} +#_cloud_type: *cloud_type +#_dns_nameserver_zone: *dns_nameserver_zone +# \ No newline at end of file diff --git a/EXAMPLE/group_vars/test_aws_euw1/cluster_vars.yml b/EXAMPLE/group_vars/test_aws_euw1/cluster_vars.yml index e37fda24..03d31890 100644 --- a/EXAMPLE/group_vars/test_aws_euw1/cluster_vars.yml +++ b/EXAMPLE/group_vars/test_aws_euw1/cluster_vars.yml @@ -1,8 +1,11 @@ --- -redeploy_scheme: _scheme_addallnew_rmdisk_rollback +redeploy_schemes_supported: ['_scheme_addallnew_rmdisk_rollback', '_scheme_addnewvm_rmdisk_rollback', '_scheme_rmvm_rmdisk_only'] + +#redeploy_scheme: _scheme_addallnew_rmdisk_rollback #redeploy_scheme: _scheme_addnewvm_rmdisk_rollback -#redeploy_scheme: _scheme_rmvm_rmdisks_only +#redeploy_scheme: _scheme_rmvm_rmdisk_only +#redeploy_scheme: _scheme_rmvm_keepdisk_only app_name: "test" # The name of the application cluster (e.g. 'couchbase', 'nginx'); becomes part of cluster_name. app_class: "test" # The class of application (e.g. 'database', 'webserver'); becomes part of the fqdn @@ -35,10 +38,7 @@ cloud_agent: ## Bind configuration and credentials, per environment bind9: - sandbox: - server: - key_name: - key_secret: + sandbox: {server: "", key_name: "", key_secret: ""} cluster_name: "{{app_name}}-{{buildenv}}" # Identifies the cluster within the cloud environment diff --git a/EXAMPLE/group_vars/test_gcp_euw1/cluster_vars.yml b/EXAMPLE/group_vars/test_gcp_euw1/cluster_vars.yml index c5a01de0..9e704425 100644 --- a/EXAMPLE/group_vars/test_gcp_euw1/cluster_vars.yml +++ b/EXAMPLE/group_vars/test_gcp_euw1/cluster_vars.yml @@ -4,9 +4,12 @@ gcp_credentials_file: "{{ lookup('env','GCP_CREDENTIALS') | default('/dev/null', true) }}" gcp_credentials_json: "{{ lookup('file', gcp_credentials_file) | default({'project_id': 'GCP_CREDENTIALS__NOT_SET','client_email': 'GCP_CREDENTIALS__NOT_SET'}, true) }}" +redeploy_schemes_supported: ['_scheme_addallnew_rmdisk_rollback', '_scheme_addnewvm_rmdisk_rollback', '_scheme_rmvm_rmdisk_only'] + redeploy_scheme: _scheme_addallnew_rmdisk_rollback #redeploy_scheme: _scheme_addnewvm_rmdisk_rollback -#redeploy_scheme: _scheme_rmvm_rmdisks_only +#redeploy_scheme: _scheme_rmvm_rmdisk_only +#redeploy_scheme: _scheme_rmvm_keepdisk_only app_name: "test" # The name of the application cluster (e.g. 'couchbase', 'nginx'); becomes part of cluster_name. app_class: "test" # The class of application (e.g. 'database', 'webserver'); becomes part of the fqdn @@ -39,10 +42,7 @@ cloud_agent: ## Bind configuration and credentials, per environment bind9: - sandbox: - server: - key_name: - key_secret: + sandbox: {server: "", key_name: "", key_secret: ""} cluster_name: "{{app_name}}-{{buildenv}}" # Identifies the cluster within the cloud environment diff --git a/_dependencies/filter_plugins/custom.py b/_dependencies/filter_plugins/custom.py index 1b27c7ba..0796adaa 100644 --- a/_dependencies/filter_plugins/custom.py +++ b/_dependencies/filter_plugins/custom.py @@ -41,14 +41,18 @@ def iplookup(fqdn): def json_loads_loose(inStr): import re, json - display.vv(u"json_loads_loose - input type: %s" % type(inStr)) + display.vvv(u"json_loads_loose - input type: %s" % type(inStr)) if type(inStr) is dict or type(inStr) is list: json_object = json.loads((str(json.dumps(inStr))).encode('utf-8')) else: try: json_object = json.loads(inStr) except (ValueError, AttributeError) as e: - return json.loads(str(re.sub(r'\'(.*?)\'([,:}])', r'"\1"\2', inStr).replace(': True', ': "True"').replace(': False', ': "False"')).encode('utf-8')) + try: + json_object = json.loads(str(re.sub(r'\'(.*?)\'([,:}])', r'"\1"\2', inStr).replace(': True', ': "True"').replace(': False', ': "False"')).encode('utf-8')) + except (ValueError, AttributeError) as e: + display.v(u"json_loads_loose - WARNING: could not parse attribute string as json: %s" % inStr) + return inStr return json_object @@ -57,6 +61,5 @@ def filters(self): return { 'dict_agg': dict_agg, 'iplookup': iplookup, - 'json_loads_loose': json_loads_loose } diff --git a/clean/tasks/clean_vms.yml b/clean/tasks/clean_vms.yml index 8bc68497..aa94f1ce 100644 --- a/clean/tasks/clean_vms.yml +++ b/clean/tasks/clean_vms.yml @@ -62,4 +62,28 @@ retries: 300 with_items: "{{r__gcp_compute_instance.results}}" when: cluster_vars.type == "gcp" + + - block: + - name: clean/del_vms/esxifree | Delete vmware VM + esxifree_guest: + hostname: "{{ cluster_vars.esxi_ip }}" + username: "{{ cluster_vars.username }}" + password: "{{ cluster_vars.password }}" + name: "{{item.name}}" + state: absent + register: esxi_instances + run_once: true + with_items: "{{hosts_to_clean}}" + async: 7200 + poll: 0 + + - name: clean_vms_esxifree | Wait for esxifree VM deletion to complete + async_status: + jid: "{{ item.ansible_job_id }}" + register: esxi_jobs + until: esxi_jobs.finished + retries: 300 + with_items: "{{esxi_instances.results}}" + when: cluster_vars.type == "esxifree" + when: hosts_to_clean | length \ No newline at end of file diff --git a/cluster_hosts/tasks/get_cluster_hosts_state.yml b/cluster_hosts/tasks/get_cluster_hosts_state.yml index 3571111a..7d50dafa 100644 --- a/cluster_hosts/tasks/get_cluster_hosts_state.yml +++ b/cluster_hosts/tasks/get_cluster_hosts_state.yml @@ -47,22 +47,22 @@ _cluster_hosts_state__urlregion: "{{r__gcp_compute_instance_info.results | json_query(\"[?resources[?labels]].resources[].{name: name, regionzone: zone, tagslabels: labels, instance_id: id, instance_state: status}\") }}" when: cluster_vars.type == "gcp" -- name: get_cluster_hosts_state/vmware | Get VMware cluster_hosts_state +- name: get_cluster_hosts_state_esxifree | Get VMware cluster_hosts_state block: - - name: get_cluster_hosts_state/vmware | Get existing VMware instance info + - name: get_cluster_hosts_state_esxifree | Get existing VMware instance info vmware_vm_info: - username: "{{ cluster_vars.esxi_username }}" - password: "{{ cluster_vars.esxi_password }}" + username: "{{ cluster_vars.username }}" + password: "{{ cluster_vars.password }}" hostname: "{{ cluster_vars.esxi_ip }}" validate_certs: no register: r__vmware_vm_info delegate_to: localhost run_once: true - - name: get_cluster_hosts_state/vmware | Get existing VMware instance facts + - name: get_cluster_hosts_state_esxifree | Get existing VMware instance facts vmware_guest_info: - username: "{{ cluster_vars.esxi_username }}" - password: "{{ cluster_vars.esxi_password }}" + username: "{{ cluster_vars.username }}" + password: "{{ cluster_vars.password }}" hostname: "{{ cluster_vars.esxi_ip }}" validate_certs: no datacenter: None @@ -72,21 +72,28 @@ delegate_to: localhost run_once: true - # Convert the annotations into a proper dictionary within the facts - - name: get_cluster_hosts_state/vmware | update r__vmware_guest_info result with json-parsed annotations + ## esxifree hosts must use the esxi 'annotations' field as json. They are stored as unconventional text in the vmx file, so must + ## be converted into inline-json within the facts. If the annotation field is not convertible to json, then we don't consider this VM part of the cluster. + - name: get_cluster_hosts_state_esxifree | update r__vmware_guest_info result with json-parsed annotations set_fact: r__vmware_guest_info: | - {% set res = r__vmware_guest_info -%} + {% set res = {'results': []} -%} {%- for result in r__vmware_guest_info.results -%} - {%- set _ = result.instance.update({'annotation': result.instance.annotation | json_loads_loose}) -%} + {%- set loadloose_res = result.instance.annotation | json_loads_loose -%} + {%- if loadloose_res | type_debug == 'dict' or loadloose_res | type_debug == 'list' -%} + {%- set _ = result.instance.update({'annotation': loadloose_res}) -%} + {%- set _ = res.results.append(result) -%} + {%- endif -%} {%- endfor -%} {{ res }} - - name: get_cluster_hosts_state/vmware | Set cluster_hosts_state + - name: get_cluster_hosts_state_esxifree | Set cluster_hosts_state set_fact: cluster_hosts_state: "{{ r__vmware_guest_info.results | json_query(\"[].{name: instance.hw_name, regionzone: None, tagslabels: instance.annotation, instance_id: instance.moid, instance_state: instance.hw_power_status}\") }}" + when: cluster_vars.type == "esxifree" + - name: get_cluster_hosts_state | cluster_hosts_state debug: msg="{{cluster_hosts_state}}" delegate_to: localhost diff --git a/cluster_hosts/tasks/get_cluster_hosts_target.yml b/cluster_hosts/tasks/get_cluster_hosts_target.yml index cb113500..4dad737e 100644 --- a/cluster_hosts/tasks/get_cluster_hosts_target.yml +++ b/cluster_hosts/tasks/get_cluster_hosts_target.yml @@ -101,7 +101,7 @@ when: cluster_vars.type == "aws" -- name: get_cluster_hosts_target/gcp | GCP-specific modifications to cluster_hosts_target - add rootvol size +- name: get_cluster_hosts_target/gcp | GCP-specific modifications to cluster_hosts_target block: - name: get_cluster_hosts_target/gcp | Update cluster_hosts_target with rootvol_size set_fact: @@ -113,6 +113,19 @@ {{ res }} when: cluster_vars.type == "gcp" +- name: get_cluster_hosts_target/esxifree | esxifree-specific modifications to cluster_hosts_target + block: + - name: get_cluster_hosts_target/esxifree | Update cluster_hosts_target with volname (derived from the mountpoint) + set_fact: + cluster_hosts_target: | + {%- for host in cluster_hosts_target -%} + {%- for hostvol in host.auto_volumes -%} + {%- set _dummy = hostvol.update({'volname': hostvol.mountpoint | regex_replace('.*\/(.*)', '\\1')}) -%} + {%- endfor %} + {%- endfor %} + {{ cluster_hosts_target }} + when: cluster_vars.type == "esxifree" + - name: get_cluster_hosts_target | cluster_hosts_target debug: msg={{cluster_hosts_target}} delegate_to: localhost diff --git a/config/tasks/disks_auto.yml b/config/tasks/disks_auto.yml index 4ce7db03..ad3715c4 100644 --- a/config/tasks/disks_auto.yml +++ b/config/tasks/disks_auto.yml @@ -21,7 +21,8 @@ hostvols: | {% set res = [] -%} {% set tmp_blkvols = lsblk_volumes -%} - {%- for autovol in cluster_vars[buildenv].hosttype_vars[hostvars[inventory_hostname].hosttype].auto_volumes -%} + {% set inventory_hostname__no_suffix = inventory_hostname | regex_replace('-(?!.*-).*') -%} + {%- for autovol in cluster_hosts_target | to_json | from_json | json_query('[?starts_with(hostname, \'' + inventory_hostname__no_suffix + '\')].auto_volumes[]') -%} {%- set blkvolloop = namespace(break=false) -%} {%- for blkvol in tmp_blkvols if not blkvolloop.break -%} {%- if (autovol.volume_size*1073741824|int) == (blkvol.size_b|int) -%} diff --git a/config/tasks/main.yml b/config/tasks/main.yml index 7de693b4..e842014a 100644 --- a/config/tasks/main.yml +++ b/config/tasks/main.yml @@ -80,9 +80,9 @@ include_tasks: cloud_agents.yml when: (cloud_agent is defined and cloud_agent) -- name: Update packages (when do_package_upgrade is defined) +- name: Update packages (when pkgupdate is defined) include_tasks: pkgupdate.yml - when: do_package_upgrade is defined and do_package_upgrade|bool + when: pkgupdate is defined and (pkgupdate == 'always' or (pkgupdate == 'onCreate' and inventory_hostname in (hostvars['localhost'].cluster_hosts_created | json_query('[].hostname')))) - name: Set hostname (e.g. AWS doesn't set it automatically) become: true diff --git a/config/tasks/pkgupdate.yml b/config/tasks/pkgupdate.yml index aa5600e8..e00ea510 100644 --- a/config/tasks/pkgupdate.yml +++ b/config/tasks/pkgupdate.yml @@ -7,6 +7,7 @@ - name: install aptitude (needed for the Debian upgrade) become: true apt: + update_cache: yes name: aptitude - name: run apt upgrade become: true @@ -24,17 +25,8 @@ register: yum_packages_update when: ansible_os_family == 'RedHat' - - block: - - name: reboot and wait (if reboot_on_package_upgrade==true) - become: yes - shell: sleep 2 && /sbin/shutdown -r now "Ansible system package upgraded" && sleep 5 - async: 1 - poll: 0 - - - set_fact: wait_host="{{ ansible_host }}" - - - name: Wait for server to come back - local_action: wait_for host={{wait_host}} delay=10 port=22 state=started - become: false + - name: reboot and wait (if reboot_on_package_upgrade==true) + become: yes + reboot: when: (reboot_on_package_upgrade is defined and reboot_on_package_upgrade|bool) and (apt_packages_update.changed or yum_packages_update.changed) when: pkgupdate is defined and (pkgupdate == 'always' or (pkgupdate == 'onCreate' and inventory_hostname in (hostvars['localhost'].cluster_hosts_created | json_query('[].hostname')))) diff --git a/create/tasks/esxifree.yml b/create/tasks/esxifree.yml new file mode 100644 index 00000000..4b6f2f4e --- /dev/null +++ b/create/tasks/esxifree.yml @@ -0,0 +1,39 @@ +--- + +- name: Create vmware instances from template + esxifree_guest: + hostname: "{{ cluster_vars.esxi_ip }}" + username: "{{ cluster_vars.username }}" + password: "{{ cluster_vars.password }}" + datastore: "{{ cluster_vars.datastore }}" + template: "{{ cluster_vars.image }}" + name: "{{ item.hostname }}" + state: present + hardware: "{{ {'version': cluster_vars.hardware_version} | combine({'num_cpus': item.flavor['num_cpus'], 'memory_mb': item.flavor['memory_mb']}) }}" + annotation: + Name: "{{item.hostname}}" + hosttype: "{{item.hosttype}}" + env: "{{buildenv}}" + cluster_name: "{{cluster_name}}" + owner: "{{lookup('env','USER')}}" + cluster_suffix: "{{cluster_suffix}}" + lifecycle_state: "current" + cloudinit_userdata: "{{ cluster_vars.cloudinit_userdata | default([]) }}" + disks: "{{ item.auto_volumes | json_query(\"[].{size_gb: volume_size, type: provisioning_type, volname: volname, src: src }\") | default([]) }}" + networks: "{{ cluster_vars[buildenv].networks | default([]) }}" + wait: true + register: esxi_instances + run_once: true + with_items: "{{ cluster_hosts_target }}" + async: 7200 + poll: 0 + +- name: Wait for instance creation to complete + async_status: + jid: "{{ item.ansible_job_id }}" + register: esxi_jobs + until: esxi_jobs.finished + retries: 300 + with_items: "{{ esxi_instances.results }}" + +#- debug: msg={{esxi_jobs.results}} diff --git a/create/tasks/main.yml b/create/tasks/main.yml index ed828dbe..d6e95706 100644 --- a/create/tasks/main.yml +++ b/create/tasks/main.yml @@ -21,10 +21,5 @@ current_release_versions: "{{ cluster_hosts_state | json_query(\"[?tagslabels.lifecycle_state=='current' && tagslabels.release].tagslabels.release\") | default([]) }}" -- name: Create AWS cluster - include_tasks: aws.yml - when: cluster_vars.type == "aws" - -- name: Create GCP cluster - include_tasks: gcp.yml - when: cluster_vars.type == "gcp" +- name: "Create {{cluster_vars.type}} cluster" + include_tasks: "{{cluster_vars.type}}.yml" diff --git a/dynamic_inventory/tasks/esxifree.yml b/dynamic_inventory/tasks/esxifree.yml new file mode 100644 index 00000000..a7e0d4fb --- /dev/null +++ b/dynamic_inventory/tasks/esxifree.yml @@ -0,0 +1,45 @@ +--- + +- name: dynamic_inventory/esxifree | Get existing VMware instance info + vmware_vm_info: + username: "{{ cluster_vars.username }}" + password: "{{ cluster_vars.password }}" + hostname: "{{ cluster_vars.esxi_ip }}" + validate_certs: no + register: r__vmware_vm_info + delegate_to: localhost + run_once: true + +- name: dynamic_inventory/esxifree | Get existing VMware instance facts + vmware_guest_info: + username: "{{ cluster_vars.username }}" + password: "{{ cluster_vars.password }}" + hostname: "{{ cluster_vars.esxi_ip }}" + validate_certs: no + datacenter: None + uuid: "{{item.uuid}}" + with_items: "{{ r__vmware_vm_info.virtual_machines | to_json | from_json | json_query(\"[?starts_with(guest_name, '\"+cluster_name+\"') && power_state=='poweredOn']\") }}" + register: r__vmware_guest_info + delegate_to: localhost + run_once: true + +## esxifree hosts must use the esxi 'annotations' field as json. They are stored as unconventional text in the vmx file, so must +## be converted into inline-json within the facts. If the annotation field is not convertible to json, then we don't consider this VM part of the cluster. +- name: dynamic_inventory/esxifree | Update r__vmware_guest_info result with json-parsed annotations + set_fact: + r__vmware_guest_info: | + {% set res = {'results': []} -%} + {%- for result in r__vmware_guest_info.results -%} + {%- set loadloose_res = result.instance.annotation | json_loads_loose -%} + {%- if loadloose_res | type_debug == 'dict' or loadloose_res | type_debug == 'list' -%} + {%- set _ = result.instance.update({'annotation': loadloose_res}) -%} + {%- set _ = res.results.append(result) -%} + {%- endif -%} + {%- endfor -%} + {{ res }} + +#- debug: msg={{r__vmware_guest_info}} + +- name: dynamic_inventory/esxifree | Set dynamic_inventory_flat + set_fact: + dynamic_inventory_flat: "{{ r__vmware_guest_info.results | json_query(\"[].{hosttype: instance.annotation.hosttype, hostname: item.guest_name, private_ip: item.ip_address, inventory_ip: item.ip_address}\") | default([]) }}" diff --git a/dynamic_inventory/tasks/main.yml b/dynamic_inventory/tasks/main.yml index 98def030..4c041104 100644 --- a/dynamic_inventory/tasks/main.yml +++ b/dynamic_inventory/tasks/main.yml @@ -1,12 +1,7 @@ --- -- name: dynamic_inventory | Derive dynamic inventory for AWS cluster - include_tasks: aws.yml - when: cluster_vars.type == "aws" - -- name: dynamic_inventory | Derive dynamic inventory for GCP cluster - include_tasks: gcp.yml - when: cluster_vars.type == "gcp" +- name: "dynamic_inventory | Derive dynamic inventory for {{cluster_vars.type}} cluster" + include_tasks: "{{cluster_vars.type}}.yml" - assert: { that: "dynamic_inventory_flat is defined", msg: "dynamic_inventory_flat is not defined" } @@ -27,10 +22,10 @@ - name: dynamic_inventory | Add hosts to dynamic inventory add_host: name: "{{ item.hostname }}" - groups: ["{{ item.hosttype }}","{{ cluster_name }}","{{ clusterid }}","{{ item.regionzone }}"] + groups: "{{ item.hosttype }},{{ cluster_name }},{{ clusterid }}{%- if 'regionzone' in item -%},{{ item.regionzone }}{%- endif -%}" ansible_host: "{{ item.inventory_ip }}" hosttype: "{{ item.hosttype }}" - regionzone: "{{ item.regionzone }}" + regionzone: "{{ item.regionzone | default(omit) }}" with_items: "{{ dynamic_inventory_flat }}" - name: dynamic_inventory | stat the inventory_file path @@ -45,7 +40,7 @@ {% if groupname not in ["all", "ungrouped"] -%} [{{ groupname }}] {% for hostname in groups[groupname] %} - {{ hostname }} ansible_host={{hostvars[hostname].ansible_host}} hosttype={{ hostvars[hostname].hosttype }} regionzone={{ hostvars[hostname].regionzone }} + {{ hostname }} ansible_host={{hostvars[hostname].ansible_host}} hosttype={{ hostvars[hostname].hosttype }} {%- if 'regionzone' in hostvars[hostname] -%}regionzone={{ hostvars[hostname].regionzone }}{%- endif -%} {% endfor %} {% endif %} @@ -57,3 +52,4 @@ - name: dynamic_inventory | current inventory_hostnames debug: msg="{{ lookup('inventory_hostnames','all').split(',') }}" + when: lookup('inventory_hostnames','all') | length diff --git a/redeploy/__common/tasks/poweroff_vms.yml b/redeploy/__common/tasks/poweroff_vms.yml index b6cdad56..38e06772 100644 --- a/redeploy/__common/tasks/poweroff_vms.yml +++ b/redeploy/__common/tasks/poweroff_vms.yml @@ -54,4 +54,28 @@ retries: 300 with_items: "{{r__gcp_compute_instance.results}}" when: cluster_vars.type == "gcp" + + + - name: poweroff_vms | Power-off vmware VM(s) and set maintenance_mode=true + block: + - name: poweroff_vms | Set maintenance_mode label on esxifree VM(s) + esxifree_guest: + hostname: "{{ cluster_vars.esxi_ip }}" + username: "{{ cluster_vars.username }}" + password: "{{ cluster_vars.password }}" + name: "{{item.name}}" + state: present + annotation: "{{ item.tagslabels | combine({'maintenance_mode': 'true'}) }}" + with_items: "{{ hosts_to_stop }}" + + - name: poweroff_vms | Power-off esxifree VM(s) + esxifree_guest: + hostname: "{{ cluster_vars.esxi_ip }}" + username: "{{ cluster_vars.username }}" + password: "{{ cluster_vars.password }}" + name: "{{item.name}}" + state: shutdownguest + with_items: "{{ hosts_to_stop }}" + + when: cluster_vars.type == "esxifree" when: hosts_to_stop | length \ No newline at end of file diff --git a/redeploy/__common/tasks/poweron_vms.yml b/redeploy/__common/tasks/poweron_vms.yml index 551c4fad..363a9aee 100644 --- a/redeploy/__common/tasks/poweron_vms.yml +++ b/redeploy/__common/tasks/poweron_vms.yml @@ -44,4 +44,16 @@ retries: 300 with_items: "{{r__gcp_compute_instance.results}}" when: cluster_vars.type == "gcp" + + + - name: poweron_vms | Power-on esxifree VM(s) + block: + - name: poweron_vms | Power-on esxifree VM(s) + esxifree_guest: + hostname: "{{ cluster_vars.esxi_ip }}" + username: "{{ cluster_vars.username }}" + password: "{{ cluster_vars.password }}" + name: "{{item.name}}" + state: powered-on + when: cluster_vars.type == "esxifree" when: hosts_to_start | length \ No newline at end of file diff --git a/redeploy/__common/tasks/set_lifecycle_state_label.yml b/redeploy/__common/tasks/set_lifecycle_state_label.yml index 045171e2..93c582be 100644 --- a/redeploy/__common/tasks/set_lifecycle_state_label.yml +++ b/redeploy/__common/tasks/set_lifecycle_state_label.yml @@ -27,4 +27,16 @@ labels: "{{ item.tagslabels | combine({'lifecycle_state': new_state}) }}" with_items: "{{ hosts_to_relabel }}" when: cluster_vars.type == "gcp" + + + - name: set_lifecycle_state_label | Change lifecycle_state label on esxifree VM + esxifree_guest: + hostname: "{{ cluster_vars.esxi_ip }}" + username: "{{ cluster_vars.username }}" + password: "{{ cluster_vars.password }}" + name: "{{item.name}}" + state: present + annotation: "{{ item.tagslabels | combine({'lifecycle_state': new_state}) }}" + with_items: "{{ hosts_to_relabel }}" + when: cluster_vars.type == "esxifree" when: hosts_to_relabel | length \ No newline at end of file diff --git a/redeploy/_scheme_rmvm_keepdisk_only/tasks/_add_diskinfo_esxifree.yml b/redeploy/_scheme_rmvm_keepdisk_only/tasks/_add_diskinfo_esxifree.yml new file mode 100644 index 00000000..40e7c103 --- /dev/null +++ b/redeploy/_scheme_rmvm_keepdisk_only/tasks/_add_diskinfo_esxifree.yml @@ -0,0 +1,36 @@ +--- + +- name: _get_diskinfo_esxifree | hosts_to_stop + debug: msg="{{hosts_to_stop}}" + +- name: _get_diskinfo_esxifree | vmware_guest_disk_info + vmware_guest_disk_info: + username: "{{ cluster_vars.username }}" + password: "{{ cluster_vars.password }}" + hostname: "{{ cluster_vars.esxi_ip }}" + datacenter: ha-datacenter + validate_certs: no + name: "{{item.name}}" + with_items: "{{hosts_to_stop}}" + register: r__vmware_guest_disk_info + +#- name: _get_diskinfo_esxifree | debug r__vmware_guest_disk_info +# debug: msg={{r__vmware_guest_disk_info}} + +- assert: { that: "r__vmware_guest_disk_info | json_query(\"results[].guest_disk_info.*[] | [?backing_datastore!='\" + cluster_vars.datastore + \"']\") | length == 0", msg: "Move is only possible if disks are on the same datastore." } + when: _scheme_rmvm_keepdisk_only__copy_or_move == "move" + +- name: _get_diskinfo_esxifree | augment cluster_host_redeploying's auto_volumes with source disk info + set_fact: + cluster_host_redeploying: | + {% set res = _cluster_host_redeploying_loopvar -%} + {%- for autovol in res.auto_volumes -%} + {%- for host_to_stop_diskinfo_result in r__vmware_guest_disk_info.results -%} + {%- if res.hostname | regex_replace('-(?!.*-).*') == host_to_stop_diskinfo_result.item.name | regex_replace('-(?!.*-).*') -%} + {%- for host_to_stop_diskinfo in host_to_stop_diskinfo_result.guest_disk_info | to_json | from_json | json_query('*| [?unit_number!=`0` && backing_type==\'FlatVer2\' && contains(backing_filename, \'--' + autovol.volname + '.vmdk\')]') -%} + {%- set _ = autovol.update({'volume_size': (host_to_stop_diskinfo.capacity_in_bytes/1073741824)|int, 'src': {'backing_filename': host_to_stop_diskinfo.backing_filename, 'copy_or_move': _scheme_rmvm_keepdisk_only__copy_or_move }}) -%} + {%- endfor -%} + {%- endif -%} + {%- endfor -%} + {%- endfor -%} + {{res}} diff --git a/redeploy/_scheme_rmvm_keepdisk_only/tasks/by_hosttype.yml b/redeploy/_scheme_rmvm_keepdisk_only/tasks/by_hosttype.yml new file mode 100644 index 00000000..1bebc776 --- /dev/null +++ b/redeploy/_scheme_rmvm_keepdisk_only/tasks/by_hosttype.yml @@ -0,0 +1,23 @@ +--- + +- name: set hosts_to_redeploy if canary==start + set_fact: hosts_to_redeploy={{(cluster_hosts_target_by_hosttype[hosttype] | sort(attribute='hostname'))[:1]}} + when: (canary is defined and canary=="start") + +- name: set hosts_to_redeploy if canary==finish + set_fact: hosts_to_redeploy={{(cluster_hosts_target_by_hosttype[hosttype] | sort(attribute='hostname'))[1:]}} + when: (canary is defined and canary=="finish") + +- name: set hosts_to_redeploy if canary==none + set_fact: hosts_to_redeploy={{(cluster_hosts_target_by_hosttype[hosttype] | sort(attribute='hostname'))}} + when: (canary is defined and canary=="none") + +- debug: msg="Canary redeploy ({{canary}}) selected; deleting and redeploying [{{hosts_to_redeploy | json_query('[].hostname') | join(', ')}}]" + when: (canary is defined) + + +- name: Run redeploy per host. Delete one at a time, then reprovision. + include_tasks: by_hosttype_by_host.yml + with_items: "{{ hosts_to_redeploy }}" + loop_control: + loop_var: _cluster_host_redeploying_loopvar diff --git a/redeploy/_scheme_rmvm_keepdisk_only/tasks/by_hosttype_by_host.yml b/redeploy/_scheme_rmvm_keepdisk_only/tasks/by_hosttype_by_host.yml new file mode 100644 index 00000000..14276ef3 --- /dev/null +++ b/redeploy/_scheme_rmvm_keepdisk_only/tasks/by_hosttype_by_host.yml @@ -0,0 +1,51 @@ +--- + +- debug: msg="Attempting to redeploy {{_cluster_host_redeploying_loopvar.hostname}}" + +- name: by_hosttype_by_host | stop/ remove previous instance + block: + - name: by_hosttype_by_host | run predeleterole role + include_role: + name: "{{predeleterole}}" + when: predeleterole is defined and predeleterole != "" + + - name: by_hosttype_by_host | Power off old VM + include_role: + name: clusterverse/redeploy/__common + tasks_from: poweroff_vms.yml + + - name: by_hosttype_by_host | re-acquire the dynamic inventory + include_role: + name: clusterverse/dynamic_inventory + + - name: by_hosttype_by_host | re-acquire cluster_hosts_target and cluster_hosts_state + import_role: + name: clusterverse/cluster_hosts + + - name: by_hosttype_by_host | create cluster_host_redeploying with the disk info from hosts_to_stop + include_role: + name: "{{role_path}}" + tasks_from: "_add_diskinfo_{{cluster_vars.type}}.yml" + vars: + _root_cluster_host_redeploying: "{{_cluster_host_redeploying_loopvar.hostname | regex_replace('-(?!.*-).*')}}" #Remove the cluster_suffix from the hostname + hosts_to_stop: "{{ cluster_hosts_state | to_json | from_json | json_query(\"[?tagslabels.lifecycle_state=='retiring' && starts_with(name, '\" + _root_cluster_host_redeploying + \"')]\") }}" + +- name: by_hosttype_by_host | cluster_host_redeploying + debug: msg={{cluster_host_redeploying}} + +- name: "by_hosttype_by_host | Run {{mainclusteryml}} to add {{cluster_host_redeploying.hostname}} to cluster" + shell: "{{ (argv | join(' ')) | regex_replace('redeploy.yml', mainclusteryml) }} -e cluster_suffix={{cluster_suffix}} -e '{'cluster_hosts_target': [{{cluster_host_redeploying | to_json}}]}'" + register: r__mainclusteryml + no_log: True + ignore_errors: yes +- debug: msg="{{[r__mainclusteryml.stdout_lines] + [r__mainclusteryml.stderr_lines]}}" + failed_when: r__mainclusteryml is failed +# when: r__mainclusteryml is failed or (debug_nested_log_output is defined and debug_nested_log_output|bool) + +- name: by_hosttype_by_host | re-acquire the dynamic inventory + include_role: + name: clusterverse/dynamic_inventory + +- name: by_hosttype_by_host | re-acquire cluster_hosts_target and cluster_hosts_state + import_role: + name: clusterverse/cluster_hosts diff --git a/redeploy/_scheme_rmvm_keepdisk_only/tasks/main.yml b/redeploy/_scheme_rmvm_keepdisk_only/tasks/main.yml new file mode 100644 index 00000000..67aeba86 --- /dev/null +++ b/redeploy/_scheme_rmvm_keepdisk_only/tasks/main.yml @@ -0,0 +1,69 @@ +--- + +- name: Preflight check + block: + - assert: { that: "_scheme_rmvm_keepdisk_only__copy_or_move is defined and _scheme_rmvm_keepdisk_only__copy_or_move in ['copy', 'move']", msg: "ERROR - _scheme_rmvm_keepdisk_only__copy_or_move must be defined and set to either 'copy' or 'move'" } + + - assert: { that: "non_current_hosts | length == 0", msg: "ERROR - All VMs must be in the 'current' lifecycle_state. Those not [{{non_current_hosts | join(',')}}]" } + vars: + non_current_hosts: "{{ cluster_hosts_state | json_query(\"[?tagslabels.lifecycle_state!='current'].name\") }}" + when: canary=="start" or canary=="none" + + - assert: + that: "{{chs_hosts | difference(chf_hosts) | length==0}}" + msg: "Cannot use this scheme to redeploy to smaller cluster; [{{ chs_hosts | join(',') }}] > [{{ chf_hosts | join(',') }}]" + vars: + chf_hosts: "{{ cluster_hosts_target | json_query(\"[].hostname\") | map('regex_replace', '-(?!.*-).*') | list }}" + chs_hosts: "{{ cluster_hosts_state | json_query(\"[].name\") | map('regex_replace', '-(?!.*-).*') | list }}" + +- name: Redeploy setup + block: + - name: Change lifecycle_state label from 'current' to 'retiring' + include_role: + name: clusterverse/redeploy/__common + tasks_from: set_lifecycle_state_label.yml + vars: + hosts_to_relabel: "{{ cluster_hosts_state | json_query(\"[?tagslabels.lifecycle_state=='current']\") }}" + new_state: "retiring" + when: ('retiring' not in (cluster_hosts_state | map(attribute='tagslabels.lifecycle_state'))) + + - name: re-acquire cluster_hosts_target and cluster_hosts_state + include_role: + name: clusterverse/cluster_hosts + public: yes + + - assert: { that: "cluster_hosts_state | json_query(\"[?tagslabels.cluster_suffix == '\"+ cluster_suffix +\"']\") | length == 0", msg: "Please ensure cluster_suffix ({{cluster_suffix}}) is not already set on the cluster" } + when: cluster_suffix is defined + when: (canary=="start" or canary=="none") + +- name: Run redeploy per hosttype. Create one at a time, then stop previous. + include_tasks: by_hosttype.yml + with_items: "{{ myhosttypes_array }}" + loop_control: + loop_var: hosttype + vars: + cluster_hosts_target_by_hosttype: "{{cluster_hosts_target | dict_agg('hosttype')}}" + myhosttypes_array: "{%- if myhosttypes is defined -%} {{ myhosttypes.split(',') }} {%- else -%} {{ cluster_hosts_target_by_hosttype.keys() | list }} {%- endif -%}" + when: canary!="tidy" + + +- name: "Tidy up powered-down, non-current instances. NOTE: Must do clean_dns first, because both clean_dns and clean_vms have the cluster_hosts role as a dependency, which when run after clean_vms, will be empty." + block: + - assert: { that: "'current' in (cluster_hosts_state | map(attribute='tagslabels.lifecycle_state'))", msg: "ERROR - Cannot tidy when there are no machines in the 'current' lifecycle_state. Please use '-e clean=_all_'." } + + - include_role: + name: clusterverse/clean + tasks_from: clean_dns.yml + when: (hosts_to_clean | length) and (cluster_vars.dns_server is defined and cluster_vars.dns_server != "") and (cluster_vars.dns_user_domain is defined and cluster_vars.dns_user_domain != "") + + - include_role: + name: clusterverse/clean + tasks_from: "clean_vms_{{cluster_vars.type}}.yml" + when: (hosts_to_clean | length) + + - debug: + msg: "tidy | No hosts to tidy. Only powered-down, non-current machines with be tidied; to clean other machines, please use the '-e clean=' extra variable." + when: hosts_to_clean | length == 0 + vars: + hosts_to_clean: "{{ cluster_hosts_state | json_query(\"[?tagslabels.lifecycle_state!='current' && !(contains('RUNNING,running', instance_state))]\") }}" + when: canary=="tidy" or ((canary=="none" or canary=="finish") and canary_tidy_on_success is defined and canary_tidy_on_success|bool) diff --git a/redeploy/tasks/main.yml b/redeploy/tasks/main.yml index 90f3b9ed..e4788e94 100644 --- a/redeploy/tasks/main.yml +++ b/redeploy/tasks/main.yml @@ -4,7 +4,8 @@ block: - assert: { that: "clean is not defined", msg: "Must not set the 'clean' variable for a redeploy" } - assert: { that: "canary is defined and (canary is defined and canary in ['start', 'finish', 'none', 'tidy', 'revert'])", msg: "Canary must be 'start', 'finish', 'none', 'tidy' or 'revert'" } - - assert: { that: "redeploy_scheme is defined" } + - assert: { that: "redeploy_scheme is defined and redeploy_scheme in redeploy_schemes_supported" } + - assert: { that: "cluster_hosts_state | length", msg: "Redeploy only possible with an existing cluster." } - name: "Run the {{redeploy_scheme}} redploy scheme" include_role: From 90344b2051c19a52397e56fbc44f5d33a83f0709 Mon Sep 17 00:00:00 2001 From: Dougal Seeley Date: Sun, 23 Aug 2020 07:36:15 +0100 Subject: [PATCH 02/58] Add esxifree_guest.py as an included library dependency --- _dependencies/library/esxifree_guest.py | 1071 +++++++++++++++++ _dependencies/library/esxifree_guest_LICENSE | 29 + .../library/esxifree_guest_README.md | 53 + 3 files changed, 1153 insertions(+) create mode 100644 _dependencies/library/esxifree_guest.py create mode 100644 _dependencies/library/esxifree_guest_LICENSE create mode 100644 _dependencies/library/esxifree_guest_README.md diff --git a/_dependencies/library/esxifree_guest.py b/_dependencies/library/esxifree_guest.py new file mode 100644 index 00000000..481dbbf9 --- /dev/null +++ b/_dependencies/library/esxifree_guest.py @@ -0,0 +1,1071 @@ +#!/usr/bin/env python3 +# -*- coding: utf-8 -*- +# +# GNU General Public License v3.0+ (see COPYING or https://www.gnu.org/licenses/gpl-3.0.txt) + +from __future__ import absolute_import, division, print_function + +__metaclass__ = type + +ANSIBLE_METADATA = {'metadata_version': '1.1', 'status': ['preview'], 'supported_by': 'community'} + +DOCUMENTATION = r''' +--- +module: esxifree_guest +short_description: Manages virtual machines in ESXi without a dependency on the vSphere/ vCenter API. +description: > + This module can be used to create new virtual machines from templates or other virtual machines, + manage power state of virtual machine such as power on, power off, suspend, shutdown, reboot, restart etc., +version_added: '2.7' +author: +- Dougal Seeley (ansible@dougalseeley.com) +requirements: +- python >= 2.7 +- paramiko +notes: + - Please make sure that the user used for esxifree_guest should have correct level of privileges. + - Tested on vSphere 6.7 +options: + hostname: + description: + - The hostname or IP address of the ESXi server. + required: true + type: str + username: + description: + - The username to access the ESXi server at C(hostname). + required: true + type: str + password: + description: + - The password of C(username) for the ESXi server, or the password for the private key (if required). + required: true + type: str + state: + description: + - Specify the state the virtual machine should be in. + - 'If C(state) is set to C(present) and virtual machine exists, ensure the virtual machine + configurations conforms to task arguments.' + - 'If C(state) is set to C(absent) and virtual machine exists, then the specified virtual machine + is removed with its associated components.' + - 'If C(state) is set to one of the following C(poweredon), C(poweredoff), C(present) + and virtual machine does not exists, then virtual machine is deployed with given parameters.' + - 'If C(state) is set to C(poweredon) and virtual machine exists with powerstate other than powered on, + then the specified virtual machine is powered on.' + - 'If C(state) is set to C(poweredoff) and virtual machine exists with powerstate other than powered off, + then the specified virtual machine is powered off.' + - 'If C(state) is set to C(shutdownguest) and virtual machine exists, then the virtual machine is shutdown.' + - 'If C(state) is set to C(rebootguest) and virtual machine exists, then the virtual machine is rebooted.' + choices: [ present, absent, poweredon, poweredoff, shutdownguest, rebootguest ] + default: present + name: + description: + - Name of the virtual machine to work with. + - Virtual machine names in ESXi are unique + - This parameter is required, if C(state) is set to C(present) and virtual machine does not exists. + - This parameter is case sensitive. + type: str + moid: + description: + - Managed Object ID of the virtual machine to manage + - This is required if C(name) is not supplied. + - If virtual machine does not exists, then this parameter is ignored. + - Will be ignored on virtual machine creation + type: str + template: + description: + - Template or existing virtual machine used to create new virtual machine. + - If this value is not set, virtual machine is created without using a template. + - If the virtual machine already exists, this parameter will be ignored. + - This parameter is case sensitive. + type: str + hardware: + description: + - Manage virtual machine's hardware attributes. + type: dict + suboptions: + version: + description: + - The Virtual machine hardware version. Default is 15 (ESXi 6.7U2 and onwards). + type: int + default: 15 + required: false + num_cpus: + description: + - Number of CPUs. + - C(num_cpus) must be a multiple of C(num_cpu_cores_per_socket). + type: int + default: 2 + required: false + num_cpu_cores_per_socket: + description: + - Number of Cores Per Socket. + type: int + default: 1 + required: false + hotadd_cpu: + description: + - Allow virtual CPUs to be added while the virtual machine is running. + type: bool + required: false + memory_mb: + description: + - Amount of memory in MB. + type: int + default: 2048 + required: false + memory_reservation_lock: + description: + - If set true, memory resource reservation for the virtual machine + will always be equal to the virtual machine's memory size. + type: bool + required: false + hotadd_memory: + description: + - Allow memory to be added while the virtual machine is running. + type: bool + required: false + guest_id: + description: + - Set the guest ID. + - This parameter is case sensitive. + - 'Examples:' + - " virtual machine with RHEL7 64 bit, will be 'rhel7-64'" + - " virtual machine with CentOS 7 (64-bit), will be 'centos7-64'" + - " virtual machine with Debian 9 (Stretch) 64 bit, will be 'debian9-64'" + - " virtual machine with Ubuntu 64 bit, will be 'ubuntu-64'" + - " virtual machine with Windows 10 (64 bit), will be 'windows9-64'" + - " virtual machine with Other (64 bit), will be 'other-64'" + - This field is required when creating a virtual machine, not required when creating from the template. + type: str + default: ubuntu-64 + disks: + description: + - A list of disks to add (or create via cloning). + - Resizing disks is not supported. + - Removing existing disks of the virtual machine is not supported. + required: false + type: list + suboptions: + boot: + description: + - Indicates that this is a boot disk. + required: false + default: no + type: bool + size_gb: + description: Specifies the size of the disk in base-2 GB. + type: int + required: true + type: + description: + - Type of disk provisioning + choices: [thin, thick, eagerzeroedthick] + type: str + required: false + default: thin + volname: + description: + - Volume name. This will be a suffix of the vmdk file, e.g. "testdisk" on a VM named "mynewvm", would yield mynewvm--testdisk.vmdk + type: str + required: true + src: + description: + - The source disk from which to create this disk. + required: false + type: dict + suboptions: + backing_filename: + description: + - The source file, e.g. "[datastore1] linux_dev/linux_dev--webdata.vmdk" + type: str + copy_or_move + description: + - Whether to copy (clone) from the source datastore, or move the file. Move will fail if source and destination datastore differ. + choices: [copy, move] + + cdrom: + description: + - A CD-ROM configuration for the virtual machine. + - 'Valid attributes are:' + - ' - C(type) (string): The type of CD-ROM, valid options are C(none), C(client) or C(iso). With C(none) the CD-ROM will be disconnected but present.' + - ' - C(iso_path) (string): The datastore path to the ISO file to use, in the form of C([datastore1] path/to/file.iso). Required if type is set C(iso).' + wait: + description: + - On creation, wait for the instance to obtain its IP address before returning. + type: bool + required: false + default: true + wait_timeout: + description: + - How long before wait gives up, in seconds. + type: int + required: false + default: 180 + force: + description: + - Delete the existing host if it exists. Use with extreme care! + type: bool + required: false + default: false + customvalues: + description: + - Define a list of custom values to set on virtual machine. + - A custom value object takes two fields C(key) and C(value). + - Incorrect key and values will be ignored. + version_added: '2.3' + cloudinit_userdata: + description: + - A list of userdata (per user) as defined U(https://cloudinit.readthedocs.io/en/latest/topics/examples.html). The + VM must already have cloud-init-vmware-guestinfo installed U(https://github.com/vmware/cloud-init-vmware-guestinfo) + networks: + description: + - A list of networks (in the order of the NICs). + - Removing NICs is not allowed, while reconfiguring the virtual machine. + - All parameters and VMware object names are case sensetive. + - 'One of the below parameters is required per entry:' + - ' - C(networkName) (string): Name of the portgroup for this interface. + - ' - C(virtualDev) (string): Virtual network device (one of C(e1000e), C(vmxnet3) (default), C(sriov)).' + - 'Optional parameters per entry (used for OS customization):' + - ' - C(cloudinit_ethernets) (dict): A list of C(ethernets) within the definition of C(Networking Config Version 2) + defined in U(https://cloudinit.readthedocs.io/en/latest/topics/network-config-format-v2.html)'. The + VM must already have cloud-init-vmware-guestinfo installed U(https://github.com/vmware/cloud-init-vmware-guestinfo) + datastore: + description: + - Specify datastore or datastore cluster to provision virtual machine. + type: str + required: true + +''' +EXAMPLES = r''' +- name: Create a virtual machine + esxifree_guest: + hostname: "192.168.1.3" + username: "svc" + password: "my_passsword" + datastore: "datastore1" + name: "test_asdf" + state: present + guest_id: ubuntu-64 + hardware: {"version": "15", "num_cpus": "2", "memory_mb": "2048"} + cloudinit_userdata: + - name: dougal + primary_group: dougal + sudo: "ALL=(ALL) NOPASSWD:ALL" + groups: "admin" + home: "/media/filestore/home/dougal" + ssh_import_id: None + lock_passwd: false + passwd: $6$j212wezy$7...YPYb2F + ssh_authorized_keys: ['ssh-rsa AAAAB3NzaC1yc2EAAAADAQABAAACA+.................GIMhdojtl6mzVn38vXMzSL29LQ== ansible@dougalseeley.com'] + disks: + - {"boot": true, "size_gb": 16, "type": "thin"} + - {"size_gb": 2, "type": "thin", "volname": "test_new"} + - {"size_gb": 1, "type": "thin", "volname": "test_clone", "src": {"backing_filename": "[datastore1] linux_dev/linux_dev--webdata.vmdk", "copy_or_move": "copy"}}], + cdrom: {"type": "iso", "iso_path": "/vmfs/volumes/4tb-evo860-ssd/ISOs/ubuntu-18.04.4-server-amd64.iso"}, + networks: + - networkName: VM Network + virtualDev: vmxnet3 + cloudinit_ethernets: + eth0: + addresses: ["192.168.1.8/25"] + dhcp4: false + gateway4: 192.168.1.1 + nameservers: + addresses: ["192.168.1.2", "8.8.8.8", "8.8.4.4"] + search: ["local.dougalseeley.com"] + delegate_to: localhost + +- name: Clone a virtual machine + esxifree_guest: + hostname: "192.168.1.3" + username: "svc" + password: "my_passsword" + datastore: "datastore1" + template: "ubuntu1804-packer-template" + name: "test_asdf" + state: present + guest_id: ubuntu-64 + hardware: {"version": "15", "num_cpus": "2", "memory_mb": "2048"} + cloudinit_userdata: + - default + - name: dougal + primary_group: dougal + sudo: "ALL=(ALL) NOPASSWD:ALL" + groups: "admin" + home: "/media/filestore/home/dougal" + ssh_import_id: None + lock_passwd: true + ssh_authorized_keys: ['ssh-rsa AAAAB3NzaC1yc2EAAAADAQABAAACA+.................GIMhdojtl6mzVn38vXMzSL29LQ== ansible@dougalseeley.com'] + disks: + - {"size_gb": 2, "type": "thin", "volname": "test_new"} + - {"size_gb": 1, "type": "thin", "volname": "test_clone", "src": {"backing_filename": "[datastore1] linux_dev/linux_dev--webdata.vmdk", "copy_or_move": "copy"}}], + networks: + - networkName: VM Network + virtualDev: vmxnet3 + cloudinit_ethernets: + eth0: + addresses: ["192.168.1.8/25"] + dhcp4: false + gateway4: 192.168.1.1 + nameservers: + addresses: ["192.168.1.2", "8.8.8.8", "8.8.4.4"] + search: ["local.dougalseeley.com"] + delegate_to: localhost + +- name: Delete a virtual machine + esxifree_guest: + hostname: "{{ esxi_ip }}" + username: "{{ username }}" + password: "{{ password }}" + name: test_vm_0001 + state: absent + delegate_to: localhost +''' + +RETURN = r''' +instance: + description: metadata about the new virtual machine + returned: always + type: dict + sample: None +''' + +import time +import re +import json +import socket +import collections +import paramiko +import sys +import base64 +import yaml +import errno # For the python2.7 IOError, because FileNotFound is for python3 + +# define a custom yaml representer to force quoted strings +yaml.add_representer(str, lambda dumper, data: dumper.represent_scalar('tag:yaml.org,2002:str', data, style='"')) + +# For the soap client +try: + from urllib.request import Request, build_opener, HTTPSHandler, HTTPCookieProcessor + from urllib.response import addinfourl + from urllib.error import HTTPError + from http.cookiejar import CookieJar + from http.client import HTTPResponse +except ImportError: + from urllib2 import Request, build_opener, HTTPError, HTTPSHandler, HTTPCookieProcessor, addinfourl + from cookielib import CookieJar + from httplib import HTTPResponse +import ssl +import xml.dom.minidom + +if sys.version_info[0] < 3: + from io import BytesIO as StringIO +else: + from io import StringIO + +# paramiko.util.log_to_file("paramiko.log") +# paramiko.common.logging.basicConfig(level=paramiko.common.DEBUG) + +try: + from ansible.module_utils.basic import AnsibleModule +except: + pass + + +# Executes soap requests on the remote host. +class vmw_soap_client(object): + def __init__(self, host, username, password): + self.vmware_soap_session_cookie = None + self.host = host + response, cookies = self.send_req("<_this>ServiceInstance") + sessionManager_name = xml.dom.minidom.parseString(response.read()).getElementsByTagName("sessionManager")[0].firstChild.data + + response, cookies = self.send_req("<_this>" + sessionManager_name + "" + username + "" + password + "") + self.vmware_soap_session_cookie = cookies['vmware_soap_session'].value + + def send_req(self, envelope_body=None): + envelope = '' + '' + str(envelope_body) + '' + cj = CookieJar() + req = Request( + url='https://' + self.host + '/sdk/vimService.wsdl', data=envelope.encode(), + headers={"Content-Type": "text/xml", "SOAPAction": "urn:vim25/6.7.3", "Accept": "*/*", "Cookie": "vmware_client=VMware; vmware_soap_session=" + str(self.vmware_soap_session_cookie)}) + + opener = build_opener(HTTPSHandler(context=ssl._create_unverified_context()), HTTPCookieProcessor(cj)) + try: + response = opener.open(req, timeout=30) + except HTTPError as err: + response = str(err) + cookies = {i.name: i for i in list(cj)} + return (response[0] if isinstance(response, list) else response, cookies) # If the cookiejar contained anything, we get a list of two responses + + def wait_for_task(self, task, timeout=30): + time_s = int(timeout) + while time_s > 0: + response, cookies = self.send_req('<_this type="PropertyCollector">ha-property-collectorTaskfalseinfo' + task + 'false') + if isinstance(response, HTTPResponse) or isinstance(response, addinfourl): + xmldom = xml.dom.minidom.parseString(response.read()) + if len(xmldom.getElementsByTagName('state')): + if xmldom.getElementsByTagName('state')[0].firstChild.data == 'success': + response = xmldom.getElementsByTagName('state')[0].firstChild.data + break + elif xmldom.getElementsByTagName('state')[0].firstChild.data == 'error': + response = str(xmldom.toxml()) + break + else: + time.sleep(1) + time_s = time_s - 1 + else: + break + return response + + +# Executes a command on the remote host. +class SSHCmdExec(object): + def __init__(self, hostname, username=None, password=None, pkeyfile=None, pkeystr=None): + self.hostname = hostname + + try: + if pkeystr and pkeystr != "": + pkey_fromstr = paramiko.RSAKey.from_private_key(StringIO(pkeystr), password) + if pkeyfile and pkeyfile != "": + pkey_fromfile = paramiko.RSAKey.from_private_key_file(pkeyfile, password) + except paramiko.ssh_exception.PasswordRequiredException as auth_err: + print("Authentication failure, Password required" + "\n\n" + str(auth_err)) + exit(1) + except paramiko.ssh_exception.SSHException as auth_err: + print("Authentication failure, SSHException" + "\n\n" + str(auth_err)) + exit(1) + except: + print("Unexpected error: ", sys.exc_info()[0]) + raise + else: + if pkeystr: + self.pkey = pkey_fromstr + if pkeyfile: + if pkey_fromstr != pkey_fromfile: + print("Both private key file and private key string specified and not equal!") + exit(1) + elif pkeyfile: + self.pkey = pkey_fromfile + + # Create instance of SSHClient object + self.remote_conn_client = paramiko.SSHClient() + self.remote_conn_client.set_missing_host_key_policy(paramiko.AutoAddPolicy()) + + # initiate SSH connection + try: + if hasattr(self, 'pkey'): + self.remote_conn_client.connect(hostname=hostname, username=username, pkey=self.pkey, timeout=10, look_for_keys=False, allow_agent=False) + else: + self.remote_conn_client.connect(hostname=hostname, username=username, password=password, timeout=10, look_for_keys=False, allow_agent=False) + except socket.error as sock_err: + print("Connection timed-out to " + hostname) # + "\n\n" + str(sock_err) + exit(1) + except paramiko.ssh_exception.AuthenticationException as auth_err: + print("Authentication failure, unable to connect to " + hostname + " as " + username + "\n\n" + str(auth_err) + "\n\n" + str(sys.exc_info()[0])) # + str(auth_err)) + exit(1) + except: + print("Unexpected error: ", sys.exc_info()[0]) + raise + + # print("SSH connection established to " + hostname + " as " + username) + + def get_sftpClient(self): + return self.remote_conn_client.open_sftp() + + # execute the command and wait for it to finish + def exec_command(self, command_string): + # print("Command is: {0}".format(command_string)) + + (stdin, stdout, stderr) = self.remote_conn_client.exec_command(command_string) + if stdout.channel.recv_exit_status() != 0: # Blocking call + raise IOError(stderr.read()) + + return stdin, stdout, stderr + + +class esxiFreeScraper(object): + vmx_skeleton = collections.OrderedDict() + vmx_skeleton['.encoding'] = "UTF-8" + vmx_skeleton['config.version'] = "8" + vmx_skeleton['pciBridge0.present'] = "TRUE" + vmx_skeleton['svga.present'] = "TRUE" + vmx_skeleton['svga.autodetect'] = "TRUE" + vmx_skeleton['pciBridge4.present'] = "TRUE" + vmx_skeleton['pciBridge4.virtualDev'] = "pcieRootPort" + vmx_skeleton['pciBridge4.functions'] = "8" + vmx_skeleton['pciBridge5.present'] = "TRUE" + vmx_skeleton['pciBridge5.virtualDev'] = "pcieRootPort" + vmx_skeleton['pciBridge5.functions'] = "8" + vmx_skeleton['pciBridge6.present'] = "TRUE" + vmx_skeleton['pciBridge6.virtualDev'] = "pcieRootPort" + vmx_skeleton['pciBridge6.functions'] = "8" + vmx_skeleton['pciBridge7.present'] = "TRUE" + vmx_skeleton['pciBridge7.virtualDev'] = "pcieRootPort" + vmx_skeleton['pciBridge7.functions'] = "8" + vmx_skeleton['vmci0.present'] = "TRUE" + vmx_skeleton['hpet0.present'] = "TRUE" + vmx_skeleton['floppy0.present'] = "FALSE" + vmx_skeleton['usb.present'] = "TRUE" + vmx_skeleton['ehci.present'] = "TRUE" + vmx_skeleton['tools.syncTime'] = "TRUE" + vmx_skeleton['scsi0.virtualDev'] = "pvscsi" + vmx_skeleton['scsi0.present'] = "TRUE" + + def __init__(self, hostname, username='root', password=None, name=None, moid=None): + self.soap_client = vmw_soap_client(host=hostname, username=username, password=password) + self.esxiCnx = SSHCmdExec(hostname=hostname, username=username, password=password) + self.name, self.moid = self.get_vm(name, moid) + if self.moid is None: + self.name = name + + def get_vm(self, name=None, moid=None): + (stdin, stdout, stderr) = self.esxiCnx.exec_command("vim-cmd vmsvc/getallvms") + allVms = stdout.readlines() + for vm in allVms: + vm_params = re.search('^(?P\d+)\s+(?P.*?)\s+(?P\[.*?\])\s+(?P.*?)\s+(?P.*?)\s+(?P.*?)(:\s+(?P.*))?$', vm) + if vm_params and vm_params.group('vmname') and vm_params.group('vmid') and ((name and name == vm_params.group('vmname')) or (moid and moid == vm_params.group('vmid'))): + return vm_params.group('vmname'), vm_params.group('vmid') + return None, None + + def get_vmx(self, moid): + (stdin, stdout, stderr) = self.esxiCnx.exec_command("vim-cmd vmsvc/get.filelayout " + str(moid) + " | grep 'vmPathName = ' | sed -r 's/^\s+vmPathName = \"(.*?)\",/\\1/g'") + vmxPathName = stdout.read().decode('UTF-8').lstrip("\r\n").rstrip(" \r\n") + vmxPath = re.sub(r"^\[(.*?)]\s+(.*?)$", r"/vmfs/volumes/\1/\2", vmxPathName) + + if vmxPath: + sftp_cnx = self.esxiCnx.get_sftpClient() + vmxFileDict = {} + for vmxline in sftp_cnx.file(vmxPath).readlines(): + vmxline_params = re.search('^(?P.*?)\s*=\s*(?P.*)$', vmxline) + if vmxline_params and vmxline_params.group('key') and vmxline_params.group('value'): + vmxFileDict[vmxline_params.group('key').strip(" \"\r\n").lower()] = vmxline_params.group('value').strip(" \"\r\n") + + return vmxPath, vmxFileDict + + def put_vmx(self, vmxDict, vmxPath): + # print(json.dumps(vmxDict, sort_keys=True, indent=4, separators=(',', ': '))) + vmxDict = collections.OrderedDict(sorted(vmxDict.items())) + vmxStr = StringIO() + for vmxKey, vmxVal in vmxDict.items(): + vmxStr.write(str(vmxKey.lower()) + " = " + "\"" + str(vmxVal) + "\"\n") + vmxStr.seek(0) + sftp_cnx = self.esxiCnx.get_sftpClient() + try: + sftp_cnx.stat(vmxPath) + sftp_cnx.remove(vmxPath) + except IOError as e: # python 2.7 + if e.errno == errno.ENOENT: + pass + except FileNotFoundError: # python 3.x + pass + sftp_cnx.putfo(vmxStr, vmxPath, file_size=0, callback=None, confirm=True) + + def create_vm(self, vmTemplate=None, annotation=None, datastore=None, hardware=None, guest_id=None, disks=None, cdrom=None, customvalues=None, networks=None, cloudinit_userdata=None): + vmPathDest = "/vmfs/volumes/" + datastore + "/" + self.name + + ## Sanity checks + for dryRunDisk in [newDisk for newDisk in disks if ('src' in newDisk and newDisk['src'] is not None)]: + if 'copy_or_move' not in dryRunDisk['src']: + return ("'copy_or_move' parameter is mandatory when src is specified for a disk.") + if 'backing_filename' not in dryRunDisk['src']: + return ("'backing_filename' parameter is mandatory when src is specified for a disk.") + + dryRunDiskFileInfo = re.search('^\[(?P.*?)\] *(?P.*\/(?P(?P.*?)(?:--(?P.*?))?\.vmdk))$', dryRunDisk['src']['backing_filename']) + try: + self.esxiCnx.exec_command("vmkfstools -g /vmfs/volumes/" + dryRunDiskFileInfo.group('datastore') + "/" + dryRunDiskFileInfo.group('fulldiskpath')) + except IOError as e: + return "'" + dryRunDisk['src']['backing_filename'] + "' is not accessible (is the VM turned on?)\n" + str(e) + + # Create VM directory + self.esxiCnx.exec_command("mkdir -p " + vmPathDest) + + vmxDict = collections.OrderedDict(esxiFreeScraper.vmx_skeleton) + + diskCount = 0 + + # First apply any vmx settings from the template. + # These will be overridden by explicit configuration. + if vmTemplate: + template_name, template_moid = self.get_vm(vmTemplate, None) + if template_moid: + template_vmxPath, template_vmxDict = self.get_vmx(template_moid) + + # Generic settings + vmxDict.update({"guestos": template_vmxDict['guestos']}) + + # Hardware settings + vmxDict.update({"virtualhw.version": template_vmxDict['virtualhw.version']}) + vmxDict.update({"memsize": template_vmxDict['memsize']}) + if 'numvcpus' in template_vmxDict: + vmxDict.update({"numvcpus": template_vmxDict['numvcpus']}) + if 'cpuid.coresPerSocket' in template_vmxDict: + vmxDict.update({"cpuid.coresPerSocket": template_vmxDict['cpuid.coresPerSocket']}) + if 'vcpu.hotadd' in template_vmxDict: + vmxDict.update({"vcpu.hotadd": template_vmxDict['vcpu.hotadd']}) + if 'mem.hotadd' in template_vmxDict: + vmxDict.update({"mem.hotadd": template_vmxDict['mem.hotadd']}) + if 'sched.mem.pin' in template_vmxDict: + vmxDict.update({"sched.mem.pin": template_vmxDict['sched.mem.pin']}) + + # Network settings + netCount = 0 + while "ethernet" + str(netCount) + ".virtualdev" in template_vmxDict: + vmxDict.update({"ethernet" + str(netCount) + ".virtualdev": template_vmxDict["ethernet" + str(netCount) + ".virtualdev"]}) + vmxDict.update({"ethernet" + str(netCount) + ".networkname": template_vmxDict["ethernet" + str(netCount) + ".networkname"]}) + vmxDict.update({"ethernet" + str(netCount) + ".addresstype": "generated"}) + vmxDict.update({"ethernet" + str(netCount) + ".present": "TRUE"}) + netCount = netCount + 1 + + ### Disk cloning - clone all disks from source + response, cookies = self.soap_client.send_req('<_this type="PropertyCollector">ha-property-collectorVirtualMachinefalselayout' + str(template_moid) + 'false') + xmldom = xml.dom.minidom.parseString(response.read()) + srcDiskFiles = [data.firstChild.data for data in xmldom.getElementsByTagName("diskFile")] + + for srcDiskFile in srcDiskFiles: + srcDiskFileInfo = re.search('^\[(?P.*?)\] *(?P.*\/(?P(?P.*?)(?:--(?P.*?))?\.vmdk))$', srcDiskFile) + diskTypeKey = next((key for key, val in template_vmxDict.items() if val == srcDiskFileInfo.group('filepath')), None) + + if re.search('scsi', diskTypeKey): + controllerTypeStr = "scsi0:" + else: + controllerTypeStr = "sata0:" + + # See if vmTemplate disk exists + try: + (stdin, stdout, stderr) = self.esxiCnx.exec_command("stat /vmfs/volumes/" + srcDiskFileInfo.group('datastore') + "/" + srcDiskFileInfo.group('fulldiskpath')) + except IOError as e: + return (srcDiskFileInfo.group('fulldiskpath') + " not found!") + else: + if diskCount == 0: + disk_filename = self.name + "--boot.vmdk" + else: + if 'diskname_suffix' in srcDiskFileInfo.groupdict() and srcDiskFileInfo.group('diskname_suffix'): + disk_filename = self.name + "--" + srcDiskFileInfo.group('diskname_suffix') + ".vmdk" + else: + disk_filename = self.name + ".vmdk" + self.esxiCnx.exec_command("vmkfstools -i /vmfs/volumes/" + srcDiskFileInfo.group('datastore') + "/" + srcDiskFileInfo.group('fulldiskpath') + " -d thin " + vmPathDest + "/" + disk_filename) + + vmxDict.update({controllerTypeStr + str(diskCount) + ".devicetype": "scsi-hardDisk"}) + vmxDict.update({controllerTypeStr + str(diskCount) + ".present": "TRUE"}) + vmxDict.update({controllerTypeStr + str(diskCount) + ".filename": disk_filename}) + diskCount = diskCount + 1 + + else: + return (vmTemplate + " not found!") + + ## Now add remaining settings, overriding template copies. + + # Generic settings + if guest_id: + vmxDict.update({"guestos": guest_id}) + vmxDict.update({"displayname": self.name}) + vmxDict.update({"vm.createdate": time.time()}) + + if annotation: + vmxDict.update({"annotation": annotation}) + + # Hardware settings + if 'version' in hardware: + vmxDict.update({"virtualhw.version": hardware['version']}) + if 'memory_mb' in hardware: + vmxDict.update({"memsize": hardware['memory_mb']}) + if 'num_cpus' in hardware: + vmxDict.update({"numvcpus": hardware['num_cpus']}) + if 'num_cpu_cores_per_socket' in hardware: + vmxDict.update({"cpuid.coresPerSocket": hardware['num_cpu_cores_per_socket']}) + if 'hotadd_cpu' in hardware: + vmxDict.update({"vcpu.hotadd": hardware['hotadd_cpu']}) + if 'hotadd_memory' in hardware: + vmxDict.update({"mem.hotadd": hardware['hotadd_memory']}) + if 'memory_reservation_lock' in hardware: + vmxDict.update({"sched.mem.pin": hardware['memory_reservation_lock']}) + + # CDROM settings + if cdrom['type'] == 'client': + (stdin, stdout, stderr) = self.esxiCnx.exec_command("find /vmfs/devices/cdrom/ -mindepth 1 ! -type l") + cdrom_dev = stdout.read().decode('UTF-8').lstrip("\r\n").rstrip(" \r\n") + vmxDict.update({"ide0:0.devicetype": "atapi-cdrom"}) + vmxDict.update({"ide0:0.filename": cdrom_dev}) + vmxDict.update({"ide0:0.present": "TRUE"}) + elif cdrom['type'] == 'iso': + if 'iso_path' in cdrom: + vmxDict.update({"ide0:0.devicetype": "cdrom-image"}) + vmxDict.update({"ide0:0.filename": cdrom['iso_path']}) + vmxDict.update({"ide0:0.present": "TRUE"}) + vmxDict.update({"ide0:0.startconnected": "TRUE"}) + + # Network settings + cloudinit_nets = {"version": 2} + for netCount in range(0, len(networks)): + vmxDict.update({"ethernet" + str(netCount) + ".virtualdev": networks[netCount]['virtualDev']}) + vmxDict.update({"ethernet" + str(netCount) + ".networkname": networks[netCount]['networkName']}) + if "macAddress" in networks[netCount]: + vmxDict.update({"ethernet" + str(netCount) + ".addresstype": "static"}) + vmxDict.update({"ethernet" + str(netCount) + ".address": networks[netCount]['macAddress']}) + vmxDict.update({"ethernet" + str(netCount) + ".checkmacaddress": "FALSE"}) + else: + vmxDict.update({"ethernet" + str(netCount) + ".addresstype": "generated"}) + vmxDict.update({"ethernet" + str(netCount) + ".present": "TRUE"}) + if "cloudinit_netplan" in networks[netCount]: + cloudinit_nets.update(networks[netCount]['cloudinit_netplan']) + + # Add cloud-init metadata (hostname & network) + cloudinit_metadata = {"local-hostname": self.name} + if cloudinit_nets['ethernets'].keys(): + # Force guest to use the MAC address as the DHCP identifier, in case the machine-id is not reset for each clone + for cloudeth in cloudinit_nets['ethernets'].keys(): + cloudinit_nets['ethernets'][cloudeth].update({"dhcp-identifier": "mac"}) + # Add the metadata + cloudinit_metadata.update({"network": base64.b64encode(yaml.dump(cloudinit_nets, width=4096, encoding='utf-8')).decode('ascii'), "network.encoding": "base64"}) + vmxDict.update({"guestinfo.metadata": base64.b64encode(yaml.dump(cloudinit_metadata, width=4096, encoding='utf-8')).decode('ascii'), "guestinfo.metadata.encoding": "base64"}) + + # Add cloud-init userdata (must be in MIME multipart format) + if cloudinit_userdata and len(cloudinit_userdata): + import sys + from email.mime.multipart import MIMEMultipart + from email.mime.text import MIMEText + combined_message = MIMEMultipart() + sub_message = MIMEText(yaml.dump({"users": cloudinit_userdata}, width=4096, encoding='utf-8'), "cloud-config", sys.getdefaultencoding()) + sub_message.add_header('Content-Disposition', 'attachment; filename="cloud-config.yaml"') + combined_message.attach(sub_message) + if sys.version_info >= (3, 0): + vmxDict.update({"guestinfo.userdata": base64.b64encode(combined_message.as_bytes()).decode('ascii'), "guestinfo.userdata.encoding": "base64"}) + else: + vmxDict.update({"guestinfo.userdata": base64.b64encode(combined_message.as_string()).decode('ascii'), "guestinfo.userdata.encoding": "base64"}) + + ### Disk create + # If the first disk doesn't exist, create it + bootDisks = [bootDisk for bootDisk in disks if 'boot' in bootDisk] + if len(bootDisks) > 1: + return ("Muiltiple boot disks not allowed") + + if "scsi0:0.filename" not in vmxDict: + if len(bootDisks) == 1: + disk_filename = self.name + "--boot.vmdk" + (stdin, stdout, stderr) = self.esxiCnx.exec_command("vmkfstools -c " + str(bootDisks[0]['size_gb']) + "G -d " + bootDisks[0]['type'] + " " + vmPathDest + "/" + disk_filename) + + vmxDict.update({"scsi0:0.devicetype": "scsi-hardDisk"}) + vmxDict.update({"scsi0:0.present": "TRUE"}) + vmxDict.update({"scsi0:0.filename": disk_filename}) + diskCount = diskCount + 1 + if len(bootDisks) == 0: + return ("Boot disk parameters not defined for new VM") + else: + if len(bootDisks) == 1: + return ("Boot disk parameters defined for cloned VM. Ambiguous requirement - not supported.") + + newDisks = [newDisk for newDisk in disks if 'boot' not in newDisk] + for newDiskCount,newDisk in enumerate(newDisks): + scsiDiskIdx = newDiskCount + diskCount + disk_filename = self.name + "--" + newDisk['volname'] + ".vmdk" + + #Check if new disk already exists - if so, exit + try: + (stdin, stdout, stderr) = self.esxiCnx.exec_command("stat " + vmPathDest + "/" + disk_filename) + except IOError as e: + if 'src' in newDisk and newDisk['src'] is not None: + cloneSrcBackingFile = re.search('^\[(?P.*?)\] *(?P.*\/(?P(?P.*?)(?:--(?P.*?))?\.vmdk))$', newDisk['src']['backing_filename']) + try: + (stdin, stdout, stderr) = self.esxiCnx.exec_command("stat /vmfs/volumes/" + cloneSrcBackingFile.group('datastore') + "/" + cloneSrcBackingFile.group('fulldiskpath')) + except IOError as e: + return (cloneSrcBackingFile.group('fulldiskpath') + " not found!\n" + str(e)) + else: + if newDisk['src']['copy_or_move'] == 'copy': + self.esxiCnx.exec_command("vmkfstools -i /vmfs/volumes/" + cloneSrcBackingFile.group('datastore') + "/" + cloneSrcBackingFile.group('fulldiskpath') + " -d thin " + vmPathDest + "/" + disk_filename) + else: + self.esxiCnx.exec_command("vmkfstools -E /vmfs/volumes/" + cloneSrcBackingFile.group('datastore') + "/" + cloneSrcBackingFile.group('fulldiskpath') + " " + vmPathDest + "/" + disk_filename) + + else: + (stdin, stdout, stderr) = self.esxiCnx.exec_command("vmkfstools -c " + str(newDisk['size_gb']) + "G -d " + newDisk['type'] + " " + vmPathDest + "/" + disk_filename) + + vmxDict.update({"scsi0:" + str(scsiDiskIdx) + ".devicetype": "scsi-hardDisk"}) + vmxDict.update({"scsi0:" + str(scsiDiskIdx) + ".present": "TRUE"}) + vmxDict.update({"scsi0:" + str(scsiDiskIdx) + ".filename": disk_filename}) + diskCount = diskCount + 1 + else: + return (disk_filename + " already present!") + + # write the vmx + self.put_vmx(vmxDict, vmPathDest + "/" + self.name + ".vmx") + + # Register the VM + (stdin, stdout, stderr) = self.esxiCnx.exec_command("vim-cmd solo/registervm " + vmPathDest + "/" + self.name + ".vmx") + self.moid = int(stdout.readlines()[0]) + + def update_vm(self, annotation=None): + if annotation: + # Update the config (annotation) in the running VM + response, cookies = self.soap_client.send_req('<_this type="VirtualMachine">' + str(self.moid) + '' + annotation + '') + waitresp = self.soap_client.wait_for_task(xml.dom.minidom.parseString(response.read()).getElementsByTagName('returnval')[0].firstChild.data) + if waitresp != 'success': + return ("Failed to ReconfigVM_Task: %s" % waitresp) + + # Now update the disk (should not be necessary, but for some reason, sometimes the ReconfigVM_Task does not flush config to disk). + vmxPath, vmxDict = self.get_vmx(self.moid) + vmxDict.update({"annotation": annotation}) + self.put_vmx(vmxDict, vmxPath) + + # def update_vm_pyvmomi(self, annotation=None): + # if annotation: + # from pyVmomi import vim + # from pyVim.task import WaitForTask + # from pyVim import connect + # + # SI = connect.SmartConnectNoSSL(host=hostname, user=username, pwd=password, port=443) + # vm = SI.content.searchIndex.FindByDnsName(None, self.name, True) + # + # spec = vim.vm.ConfigSpec() + # spec.annotation = annotation + # task = vm.ReconfigVM_Task(spec) + # WaitForTask(task) + + # Delete the cloud-init guestinfo.metadata info from the .vmx file, otherwise it will be impossible to change the network configuration or hostname. + def delete_cloudinit(self): + vmxPath, vmxDict = self.get_vmx(self.moid) + if 'guestinfo.metadata' in vmxDict: + del vmxDict['guestinfo.metadata'] + if 'guestinfo.metadata.encoding' in vmxDict: + del vmxDict['guestinfo.metadata.encoding'] + if 'guestinfo.userdata' in vmxDict: + del vmxDict['guestinfo.userdata'] + if 'guestinfo.userdata.encoding' in vmxDict: + del vmxDict['guestinfo.userdata.encoding'] + + # write the vmx + self.put_vmx(vmxDict, vmxPath) + + +def main(): + argument_spec = { + "hostname": {"type": "str", "required": True}, + "username": {"type": "str", "required": True}, + "password": {"type": "str"}, + "name": {"type": "str"}, + "moid": {"type": "str"}, + "template": {"type": "str"}, + "state": {"type": "str", "default": 'present', "choices": ['absent', 'present', 'rebootguest', 'poweredon', 'poweredoff', 'shutdownguest']}, + "force": {"type": "bool", "default": False}, + "datastore": {"type": "str"}, + "annotation": {"type": "str", "default": ""}, + "guest_id": {"type": "str", "default": "ubuntu-64"}, + "hardware": {"type": "dict", "default": {"version": "15", "num_cpus": "2", "memory_mb": "2048", "num_cpu_cores_per_socket": "1", "hotadd_cpu": "False", "hotadd_memory": "False", "memory_reservation_lock": "False"}}, + "cloudinit_userdata": {"type": "list", "default": []}, + "disks": {"type": "list", "default": [{"boot": True, "size_gb": 16, "type": "thin"}]}, + "cdrom": {"type": "dict", "default": {"type": "client"}}, + "networks": {"type": "list", "default": [{"networkName": "VM Network", "virtualDev": "vmxnet3"}]}, + "customvalues": {"type": "list", "default": []}, + "wait": {"type": "bool", "default": True}, + "wait_timeout": {"type": "int", "default": 180} + } + + if not (len(sys.argv) > 1 and sys.argv[1] == "console"): + module = AnsibleModule(argument_spec=argument_spec, supports_check_mode=True, required_one_of=[['name', 'moid']]) + else: + # For testing without Ansible (e.g on Windows) + class cDummyAnsibleModule(): + ## Create blank VM + # params = { + # "hostname": "192.168.1.3", + # "username": "svc", + # "password": None, + # "name": "test-asdf", + # "moid": None, + # "template": None, + # "state": "present", + # "force": False, + # "datastore": "4tb-evo860-ssd", + # "annotation": "{'Name': 'test-asdf'}", + # "guest_id": "ubuntu-64", + # "hardware": {"version": "15", "num_cpus": "2", "memory_mb": "2048"}, + # "cloudinit_userdata": [], + # "disks": [{"boot": True, "size_gb": 16, "type": "thin"}, {"size_gb": 5, "type": "thin"}, {"size_gb": 2, "type": "thin"}], + # "cdrom": {"type": "iso", "iso_path": "/vmfs/volumes/4tb-evo860-ssd/ISOs/ubuntu-18.04.2-server-amd64.iso"}, + # "networks": [{"networkName": "VM Network", "virtualDev": "vmxnet3"}], + # "customvalues": [], + # "wait": True, + # "wait_timeout": 180, + # } + + ## Clone VM + params = { + "annotation": "{'lifecycle_state': 'current', 'Name': 'test-prod-sys-a0-1589979249', 'cluster_suffix': '1589979249', 'hosttype': 'sys', 'cluster_name': 'test-prod', 'env': 'prod', 'owner': 'dougal'}", + "cdrom": {"type": "client"}, + "cloudinit_userdata": [], + "customvalues": [], + "datastore": "4tb-evo860-ssd", + # "disks": [{"size_gb": 1, "type": "thin", "volname": "test"}], + "disks": [{"size_gb": 1, "type": "thin", "volname": "test_new"}, {"size_gb": 1, "type": "thin", "volname": "test_clone", "src": {"backing_filename": "[4tb-evo860-ssd] parsnip-dev-sys-a0-blue/parsnip-dev-sys-a0-blue--webdata.vmdk", "copy_or_move": "copy"}}], + "force": False, + "guest_id": "ubuntu-64", + "hardware": {"memory_mb": "2048", "num_cpus": "2", "version": "15"}, + "hostname": "192.168.1.3", + "moid": None, + "name": "gold-alpine-test1", + "networks": [{"cloudinit_netplan": {"ethernets": {"eth0": {"dhcp4": True}}}, "networkName": "VM Network", "virtualDev": "vmxnet3"}], + "password": sys.argv[2], + "state": "present", + "template": "gold-alpine", + "username": "svc", + "wait": True, + "wait_timeout": 180 + } + + ## Delete VM + # params = { + # "hostname": "192.168.1.3", + # "username": "svc", + # "password": None, + # "name": "test-asdf", + # "moid": None, + # "state": "absent" + # } + + def exit_json(self, changed, **kwargs): + print(changed, json.dumps(kwargs, sort_keys=True, indent=4, separators=(',', ': '))) + + def fail_json(self, msg): + print("Failed: " + msg) + exit(1) + + module = cDummyAnsibleModule() + + iScraper = esxiFreeScraper(hostname=module.params['hostname'], + username=module.params['username'], + password=module.params['password'], + name=module.params['name'], + moid=module.params['moid']) + + if iScraper.moid is None and iScraper.name is None: + module.fail_json(msg="If VM doesn't already exist, you must provide a name for it") + + # Check if the VM exists before continuing + if module.params['state'] == 'shutdownguest': + if iScraper.moid: + iScraper.soap_client.send_req('<_this type="VirtualMachine">' + str(iScraper.moid) + '') + time_s = 60 + while time_s > 0: + (stdin, stdout, stderr) = iScraper.esxiCnx.exec_command("vim-cmd vmsvc/power.getstate " + str(iScraper.moid)) + if re.search('Powered off', stdout.read().decode('UTF-8')) is not None: + break + else: + time.sleep(1) + time_s = time_s - 1 + module.exit_json(changed=True, meta={"msg": "Shutdown " + iScraper.name + ": " + str(iScraper.moid)}) + else: + module.fail_json(msg="VM doesn't exist.") + + elif module.params['state'] == 'poweredon': + if iScraper.moid: + response, cookies = iScraper.soap_client.send_req('<_this type="VirtualMachine">' + str(iScraper.moid) + '') + if iScraper.soap_client.wait_for_task(xml.dom.minidom.parseString(response.read()).getElementsByTagName('returnval')[0].firstChild.data, int(module.params['wait_timeout'])) != 'success': + module.fail_json(msg="Failed to PowerOnVM_Task") + module.exit_json(changed=True, meta={"msg": "Powered-on " + iScraper.name + ": " + str(iScraper.moid)}) + else: + module.fail_json(msg="VM doesn't exist.") + + elif module.params['state'] == 'poweredoff': + if iScraper.moid: + response, cookies = iScraper.soap_client.send_req('<_this type="VirtualMachine">' + str(iScraper.moid) + '') + if iScraper.soap_client.wait_for_task(xml.dom.minidom.parseString(response.read()).getElementsByTagName('returnval')[0].firstChild.data, int(module.params['wait_timeout'])) != 'success': + module.fail_json(msg="Failed to PowerOffVM_Task") + module.exit_json(changed=True, meta={"msg": "Powered-off " + iScraper.name + ": " + str(iScraper.moid)}) + else: + module.fail_json(msg="VM doesn't exist.") + + elif module.params['state'] == 'absent': + if iScraper.moid: + # Turn off (ignoring failures), then destroy + response, cookies = iScraper.soap_client.send_req('<_this type="VirtualMachine">' + str(iScraper.moid) + '') + iScraper.soap_client.wait_for_task(xml.dom.minidom.parseString(response.read()).getElementsByTagName('returnval')[0].firstChild.data, int(module.params['wait_timeout'])) + + response, cookies = iScraper.soap_client.send_req('<_this type="VirtualMachine">' + str(iScraper.moid) + '') + if iScraper.soap_client.wait_for_task(xml.dom.minidom.parseString(response.read()).getElementsByTagName('returnval')[0].firstChild.data, int(module.params['wait_timeout'])) != 'success': + module.fail_json(msg="Failed to Destroy_Task") + module.exit_json(changed=True, meta={"msg": "Deleted " + iScraper.name + ": " + str(iScraper.moid)}) + else: + module.exit_json(changed=False, meta={"msg": "VM " + iScraper.name + ": already absent."}) + + elif module.params['state'] == 'rebootguest': + if iScraper.moid: + (stdin, stdout, stderr) = iScraper.esxiCnx.exec_command("vim-cmd vmsvc/power.getstate " + str(iScraper.moid)) + if re.search('Powered off', stdout.read().decode('UTF-8')) is not None: + response, cookies = iScraper.send_req('<_this type="VirtualMachine">' + str(iScraper.moid) + '') + if iScraper.soap_client.wait_for_task(xml.dom.minidom.parseString(response.read()).getElementsByTagName('returnval')[0].firstChild.data, int(module.params['wait_timeout'])) != 'success': + module.fail_json(msg="Failed to PowerOnVM_Task") + else: + response, cookies = iScraper.send_req('<_this type="VirtualMachine">' + str(iScraper.moid) + '') + if iScraper.soap_client.wait_for_task(xml.dom.minidom.parseString(response.read()).getElementsByTagName('returnval')[0].firstChild.data, int(module.params['wait_timeout'])) != 'success': + module.fail_json(msg="Failed to RebootGuest") + module.exit_json(changed=True, meta={"msg": "Rebooted " + iScraper.name + ": " + str(iScraper.moid)}) + else: + module.fail_json(msg="VM doesn't exist.") + + elif module.params['state'] == 'present': + exit_args = {} + # If the VM already exists, and the 'force' flag is set, then we delete it (and recreate it) + if iScraper.moid and module.params['force']: + # Turn off (ignoring failures), then destroy + response, cookies = iScraper.soap_client.send_req('<_this type="VirtualMachine">' + str(iScraper.moid) + '') + iScraper.soap_client.wait_for_task(xml.dom.minidom.parseString(response.read()).getElementsByTagName('returnval')[0].firstChild.data, int(module.params['wait_timeout'])) + + response, cookies = iScraper.soap_client.send_req('<_this type="VirtualMachine">' + str(iScraper.moid) + '') + if iScraper.soap_client.wait_for_task(xml.dom.minidom.parseString(response.read()).getElementsByTagName('returnval')[0].firstChild.data, int(module.params['wait_timeout'])) != 'success': + module.fail_json(msg="Failed to Destroy_Task") + iScraper.moid = None + + # If the VM doesn't exist, create it. + if iScraper.moid is None: + createVmResult = iScraper.create_vm(module.params['template'], module.params['annotation'], module.params['datastore'], module.params['hardware'], module.params['guest_id'], module.params['disks'], module.params['cdrom'], module.params['customvalues'], module.params['networks'], module.params['cloudinit_userdata']) + if createVmResult != None: + module.fail_json(msg="Failed to create_vm: %s" % createVmResult) + + response, cookies = iScraper.soap_client.send_req('<_this type="VirtualMachine">' + str(iScraper.moid) + '') + waitresp = iScraper.soap_client.wait_for_task(xml.dom.minidom.parseString(response.read()).getElementsByTagName('returnval')[0].firstChild.data, int(module.params['wait_timeout'])) + if waitresp != 'success': + module.fail_json(msg="Failed to PowerOnVM_Task: %s" % waitresp) + + isChanged = True + + ## Delete the cloud-init config + iScraper.delete_cloudinit() + + if "wait" in module.params and module.params['wait']: + time_s = int(module.params['wait_timeout']) + while time_s > 0: + (stdin, stdout, stderr) = iScraper.esxiCnx.exec_command("vim-cmd vmsvc/get.guest " + str(iScraper.moid)) + guest_info = stdout.read().decode('UTF-8') + vm_params = re.search('\s*hostName\s*=\s*\"?(?P.*?)\"?,.*\n\s*ipAddress\s*=\s*\"?(?P.*?)\"?,.*', guest_info) + if vm_params and vm_params.group('vm_ip') != "" and vm_params.group('vm_hostname') != "": + break + else: + time.sleep(1) + time_s = time_s - 1 + + module.exit_json(changed=isChanged, + guest_info=guest_info, + hostname=vm_params.group('vm_hostname'), + ip_address=vm_params.group('vm_ip'), + name=module.params['name'], + moid=iScraper.moid) + else: + module.exit_json(changed=isChanged, + hostname="", + ip_address="", + name=module.params['name'], + moid=iScraper.moid) + + else: + updateVmResult = iScraper.update_vm(annotation=module.params['annotation']) + if updateVmResult != None: + module.fail_json(msg=updateVmResult) + + module.exit_json(changed=True, name=module.params['name'], moid=iScraper.moid) + + else: + module.exit_json(changed=False, meta={"msg": "No state."}) + + +if __name__ == '__main__': + main() diff --git a/_dependencies/library/esxifree_guest_LICENSE b/_dependencies/library/esxifree_guest_LICENSE new file mode 100644 index 00000000..3c642ec5 --- /dev/null +++ b/_dependencies/library/esxifree_guest_LICENSE @@ -0,0 +1,29 @@ +BSD 3-Clause License + +Copyright (c) 2020, Dougal Seeley +All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are met: + +* Redistributions of source code must retain the above copyright notice, this + list of conditions and the following disclaimer. + +* Redistributions in binary form must reproduce the above copyright notice, + this list of conditions and the following disclaimer in the documentation + and/or other materials provided with the distribution. + +* Neither the name of the copyright holder nor the names of its + contributors may be used to endorse or promote products derived from + this software without specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE +DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE +FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR +SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER +CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, +OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. diff --git a/_dependencies/library/esxifree_guest_README.md b/_dependencies/library/esxifree_guest_README.md new file mode 100644 index 00000000..9cdceaa8 --- /dev/null +++ b/_dependencies/library/esxifree_guest_README.md @@ -0,0 +1,53 @@ +# esxifree_guest +https://github.com/dseeley/esxifree_guest + +This module can be used to create new ESXi virtual machines, including cloning from templates or other virtual machines. + +It does so using direct SOAP calls and Paramiko SSH to the host - without using the vSphere API - meaning it can be used on the free hypervisor. + +## Configuration +Your ESXi host needs some config: ++ Enable SSH + + Inside the web UI, navigate to “Manage”, then the “Services” tab. Find the entry called: “TSM-SSH”, and enable it. ++ Enable “Guest IP Hack” + + `esxcli system settings advanced set -o /Net/GuestIPHack -i 1` ++ Open VNC Ports on the Firewall + ``` + Packer connects to the VM using VNC, so we’ll open a range of ports to allow it to connect to it. + + First, ensure we can edit the firewall configuration: + + chmod 644 /etc/vmware/firewall/service.xml + chmod +t /etc/vmware/firewall/service.xml + Then append the range we want to open to the end of the file: + + + packer-vnc + + inbound + tcp + dst + + 5900 + 6000 + + + true + true + + Finally, restore the permissions and reload the firewall: + + chmod 444 /etc/vmware/firewall/service.xml + esxcli network firewall refresh + ``` + +## Requirements ++ python 3 ++ paramiko ++ Any base-images from which clones are to be made must have cloud-init and [`cloud-init-vmware-guestinfo`](https://github.com/vmware/cloud-init-vmware-guestinfo) installed + +## Execution +This can be run as an Ansible module (see inline documentation), or from the console: +```bash +python3 ./esxifree_guest.py console +``` \ No newline at end of file From 9d9e3508050b2ff230e787d46030e64d871df85c Mon Sep 17 00:00:00 2001 From: Dougal Seeley Date: Sat, 12 Sep 2020 15:34:01 +0100 Subject: [PATCH 03/58] Replaced the cli_facts.py as a vars_plugin rather than a callback_plugin, to ensure it works properly with include_role. --- _dependencies/callback_plugins/cli_facts.py | 58 --------------------- _dependencies/vars_plugins/cli_facts.py | 30 +++++++++++ 2 files changed, 30 insertions(+), 58 deletions(-) delete mode 100644 _dependencies/callback_plugins/cli_facts.py create mode 100644 _dependencies/vars_plugins/cli_facts.py diff --git a/_dependencies/callback_plugins/cli_facts.py b/_dependencies/callback_plugins/cli_facts.py deleted file mode 100644 index 6620ca80..00000000 --- a/_dependencies/callback_plugins/cli_facts.py +++ /dev/null @@ -1,58 +0,0 @@ -#!/usr/bin/python -# GNU General Public License v3.0+ (see COPYING or https://www.gnu.org/licenses/gpl-3.0.txt) - -from __future__ import absolute_import, division, print_function - -__metaclass__ = type - -ANSIBLE_METADATA = {'metadata_version': '1.2', - 'status': ['preview'], - 'supported_by': 'community'} - -DOCUMENTATION = ''' ---- -module: cli_facts -short_description: Expose the system ARGV and CLI arguments as facts in plays. -version_added: "2.8" -author: "Dougal Seeley" -description: - - Expose the system ARGV and CLI arguments as facts in plays. Two new facts are added: argv and cliargs. -options: -requirements: -''' - -from ansible.plugins.callback import CallbackBase -from ansible.context import CLIARGS -from ansible.cli import CLI - -import sys - -HAS_MODULES = True - - -class CallbackModule(CallbackBase): - CALLBACK_VERSION = 2.0 - CALLBACK_TYPE = 'aggregate' - CALLBACK_NAME = 'cli_facts' - CALLBACK_NEEDS_WHITELIST = False - - def __init__(self, *args, **kwargs): - super(CallbackModule, self).__init__(*args, **kwargs) - self._cliargs = CLIARGS - self._argv = sys.argv - - def set_options(self, task_keys=None, var_options=None, direct=None): - super(CallbackModule, self).set_options(task_keys=task_keys, var_options=var_options, direct=direct) - - def v2_playbook_on_play_start(self, play): - variable_manager = play.get_variable_manager() - - # We cannot put 'localhost' in get_hosts(pattern=['all', 'localhost']) call, because of PR 58400, described below. - hosts = variable_manager._inventory.get_hosts(pattern=['all'], ignore_restrictions=True) - if variable_manager._inventory.localhost: hosts.append(variable_manager._inventory.localhost) - for host in hosts: - # Ansible 2.9 (https://github.com/ansible/ansible/pull/58400) changed the 'host' type in ansible/vars/manager.py::set_host_variable() from type to type string. - if CLI.version_info()['major'] >= 2 and CLI.version_info()['minor'] >= 9: - host = str(host) - variable_manager.set_host_variable(host, "cliargs", dict(self._cliargs)) - variable_manager.set_host_variable(host, "argv", self._argv) diff --git a/_dependencies/vars_plugins/cli_facts.py b/_dependencies/vars_plugins/cli_facts.py new file mode 100644 index 00000000..5cd07ccf --- /dev/null +++ b/_dependencies/vars_plugins/cli_facts.py @@ -0,0 +1,30 @@ +from __future__ import absolute_import, division, print_function +__metaclass__ = type + +ANSIBLE_METADATA = {'metadata_version': '1.2', + 'status': ['preview'], + 'supported_by': 'community'} + +DOCUMENTATION = ''' +--- +cars: cli_facts +short_description: Expose the system ARGV and CLI arguments as facts in plays. +version_added: "2.8" +author: "Dougal Seeley" +description: + - Expose the system ARGV and CLI arguments as facts in plays. Two new facts are added: argv and cliargs. +options: +requirements: +''' + +from ansible.plugins.vars import BaseVarsPlugin +from ansible.context import CLIARGS +import sys + + +class VarsModule(BaseVarsPlugin): + REQUIRES_WHITELIST = False + + def get_vars(self, loader, path, entities, cache=True): + super(VarsModule, self).get_vars(loader, path, entities) + return {"cliargs": dict(CLIARGS), "argv": sys.argv} From a648847c074a6e9c914d8ce634568d60eb8fbca2 Mon Sep 17 00:00:00 2001 From: Dougal Seeley Date: Fri, 18 Sep 2020 09:40:09 +0100 Subject: [PATCH 04/58] Update EXAMPLE/ to reflect new ability to dynamically load clusterverse via galaxy. --- EXAMPLE/cluster.yml | 24 +++++++++++++++++------- EXAMPLE/redeploy.yml | 12 +++++++++++- 2 files changed, 28 insertions(+), 8 deletions(-) diff --git a/EXAMPLE/cluster.yml b/EXAMPLE/cluster.yml index 94af2e14..6fdab2b0 100644 --- a/EXAMPLE/cluster.yml +++ b/EXAMPLE/cluster.yml @@ -1,27 +1,37 @@ --- +- name: Download required roles + hosts: localhost:all + connection: local + tasks: + - name: "ansible-galaxy install" + local_action: command ansible-galaxy install -r requirements.yml + delegate_to: localhost + run_once: true + tags: [always] + - name: Deploy the cluster hosts: localhost connection: local tasks: - - { import_role: { name: clusterverse/clean }, tags: [clusterverse_clean], when: clean is defined } # Alternative include_role (need to force the tags): - { include_role: { name: clusterverse/clean, apply: {tags: [clusterverse_clean]}}, tags: [clusterverse_clean], when: clean is defined } - - { import_role: { name: clusterverse/create }, tags: [clusterverse_create] } - - { import_role: { name: clusterverse/dynamic_inventory }, tags: [clusterverse_dynamic_inventory] } + - { include_role: { name: "clusterverse/clean", apply: {tags: &roletag_clean ["clusterverse_clean"]} }, tags: *roletag_clean, when: "clean is defined" } + - { include_role: { name: "clusterverse/create", apply: {tags: &roletag_create ["clusterverse_create"]} }, tags: *roletag_create } + - { include_role: { name: "clusterverse/dynamic_inventory", apply: {tags: &roletag_dynamic_inventory ["clusterverse_dynamic_inventory"]} }, tags: *roletag_dynamic_inventory } - name: Configure the cluster hosts: all tasks: - - { import_role: { name: clusterverse/config }, tags: [clusterverse_config] } + - { include_role: { name: "clusterverse/config", apply: {tags: &roletag_config ["clusterverse_config"]} }, tags: *roletag_config } ## Application roles -- name: Test application role +- name: Application roles hosts: all tasks: - - { import_role: { name: "testrole" }, tags: [testrole] } + - { include_role: { name: "testrole", apply: {tags: &roletag_testrole ["testrole"]} }, tags: *roletag_testrole } ## - name: Perform cluster readiness operations hosts: localhost connection: local tasks: - - { import_role: { name: clusterverse/readiness }, tags: [clusterverse_readiness] } + - { include_role: { name: "clusterverse/readiness", apply: {tags: &roletag_readiness ["clusterverse_readiness"]} }, tags: *roletag_readiness } diff --git a/EXAMPLE/redeploy.yml b/EXAMPLE/redeploy.yml index 8c033ca8..d53f27d2 100644 --- a/EXAMPLE/redeploy.yml +++ b/EXAMPLE/redeploy.yml @@ -1,11 +1,21 @@ --- +- name: Download required roles + hosts: localhost:all + connection: local + tasks: + - name: "ansible-galaxy install" + local_action: command ansible-galaxy install -r requirements.yml + delegate_to: localhost + run_once: true + tags: [always] + - name: Redeploy hosts: localhost connection: local tasks: - name: Run redeploy - import_role: + include_role: name: clusterverse/redeploy vars: mainclusteryml: "cluster.yml" From 67beeadfa29b6e38342697542b2a8c197707385f Mon Sep 17 00:00:00 2001 From: Dougal Seeley Date: Fri, 18 Sep 2020 13:46:12 +0100 Subject: [PATCH 05/58] + Updates to allow clusterverse to be dynamically acquired via ansible-galaxy as part of the playbook. + Change the EXAMPLE/cluster.yml and redeploy.yml to use include_roles (dynamic load, because clusterverse won't exist yet) + Change cli_facts.py plugin from a callback to a vars plugin (callback_plugins are not evaluated in include_role) + Remove dependency on ansible facts for epoch time. + Simplify the package upgrade logic to use new 'reboot:' action + Make the inclusion of 'regionzone' in the inventory dependent on it existing for that cloud type. + Add short delay to allow bind9 zone transfers to complete --- EXAMPLE/README.md | 6 +- EXAMPLE/cluster.yml | 22 +++-- EXAMPLE/group_vars/_skel/cluster_vars.yml | 5 +- .../group_vars/test_aws_euw1/cluster_vars.yml | 9 +- .../group_vars/test_gcp_euw1/cluster_vars.yml | 9 +- EXAMPLE/redeploy.yml | 6 +- _dependencies/callback_plugins/cli_facts.py | 58 ------------ _dependencies/filter_plugins/custom.py | 9 +- _dependencies/vars_plugins/cli_facts.py | 30 ++++++ .../tasks/get_cluster_hosts_target.yml | 2 +- cluster_hosts/tasks/main.yml | 4 +- config/tasks/create_dns_a.yml | 93 ++++++++++--------- config/tasks/main.yml | 4 +- config/tasks/pkgupdate.yml | 16 +--- create/tasks/main.yml | 9 +- dynamic_inventory/tasks/main.yml | 16 ++-- redeploy/tasks/main.yml | 1 + 17 files changed, 133 insertions(+), 166 deletions(-) delete mode 100644 _dependencies/callback_plugins/cli_facts.py create mode 100644 _dependencies/vars_plugins/cli_facts.py diff --git a/EXAMPLE/README.md b/EXAMPLE/README.md index 986bcf27..d20fc2f9 100644 --- a/EXAMPLE/README.md +++ b/EXAMPLE/README.md @@ -67,11 +67,13 @@ ansible-playbook -u --private-key=/home//.ssh/ cluster + `-e app_class=` - Normally defined in `group_vars//cluster_vars.yml`. The class of application (e.g. 'database', 'webserver'); becomes part of the fqdn + `-e release_version=` - Identifies the application version that is being deployed. + `-e clean=[current|retiring|redeployfail|_all_]` - Deletes VMs in `lifecycle_state`, or `_all_`, as well as networking and security groups -+ `-e do_package_upgrade=true` - Upgrade the OS packages (not good for determinism) ++ `-e pkgupdate=[always|onCreate]` - Upgrade the OS packages (not good for determinism). `onCreate` only upgrades when creating the VM for the first time. + `-e reboot_on_package_upgrade=true` - After updating packages, performs a reboot on all nodes. + `-e prometheus_node_exporter_install=false` - Does not install the prometheus node_exporter + `-e static_journal=true` - Creates /var/log/journal directory, which will keep a permanent record of journald logs in systemd machines (normally ephemeral) + `-e filebeat_install=false` - Does not install filebeat ++ `-e metricbeat_install=false` - Does not install metricbeat ++ `-e wait_for_dns=false` - Does not wait for DNS resolution + `-e create_gcp_network=true` - Create GCP network and subnetwork (probably needed if creating from scratch and using public network) ### Tags @@ -83,7 +85,7 @@ ansible-playbook -u --private-key=/home//.ssh/ cluster --- ## Invocation examples: _redeploy_ -The `redeploy.yml` sub-role will completely redeploy the cluster; this is useful for example to upgrade the underlying operating system version. +The `redeploy.yml` sub-role will completely redeploy the cluster; this is useful for example to upgrade the underlying operating system version, or changing the disk sizes. ### AWS: ``` diff --git a/EXAMPLE/cluster.yml b/EXAMPLE/cluster.yml index 701d7c7c..031ca90d 100644 --- a/EXAMPLE/cluster.yml +++ b/EXAMPLE/cluster.yml @@ -3,22 +3,28 @@ - name: Deploy the cluster hosts: localhost connection: local - roles: - - { role: clusterverse/clean, tags: [clusterverse_clean], when: clean is defined } - - { role: clusterverse/create, tags: [clusterverse_create] } - - { role: clusterverse/dynamic_inventory, tags: [clusterverse_dynamic_inventory] } + gather_facts: no + tasks: + - { name: "Get dependent roles via ansible-galaxy", local_action: "command ansible-galaxy install -r requirements.yml", tags: ["always"] } + + - { include_role: { name: "clusterverse/clean", apply: {tags: &roletag_clean ["clusterverse_clean"]} }, tags: *roletag_clean, when: "clean is defined" } + - { include_role: { name: "clusterverse/create", apply: {tags: &roletag_create ["clusterverse_create"]} }, tags: *roletag_create } + - { include_role: { name: "clusterverse/dynamic_inventory", apply: {tags: &roletag_dynamic_inventory ["clusterverse_dynamic_inventory"]} }, tags: *roletag_dynamic_inventory } - name: Configure the cluster hosts: all - roles: [ { role: clusterverse/config, tags: [clusterverse_config] } ] + tasks: + - { include_role: { name: "clusterverse/config", apply: {tags: &roletag_config ["clusterverse_config"]} }, tags: *roletag_config } ## Application roles -- name: Test application role +- name: Application roles hosts: all - roles: [ { role: testrole, tags: [testrole] } ] + tasks: + - { include_role: { name: "testrole", apply: {tags: &roletag_testrole ["testrole"]} }, tags: *roletag_testrole } ## - name: Perform cluster readiness operations hosts: localhost connection: local - roles: [ { role: clusterverse/readiness, tags: [clusterverse_readiness] } ] + tasks: + - { include_role: { name: "clusterverse/readiness", apply: {tags: &roletag_readiness ["clusterverse_readiness"]} }, tags: *roletag_readiness } diff --git a/EXAMPLE/group_vars/_skel/cluster_vars.yml b/EXAMPLE/group_vars/_skel/cluster_vars.yml index 1aec269f..c6ebfbde 100644 --- a/EXAMPLE/group_vars/_skel/cluster_vars.yml +++ b/EXAMPLE/group_vars/_skel/cluster_vars.yml @@ -35,10 +35,7 @@ cloud_agent: ## Bind configuration and credentials, per environment bind9: - sandbox: - server: - key_name: - key_secret: + sandbox: {server: "", key_name: "", key_secret: ""} cluster_name: "{{app_name}}-{{buildenv}}" # Identifies the cluster within the cloud environment diff --git a/EXAMPLE/group_vars/test_aws_euw1/cluster_vars.yml b/EXAMPLE/group_vars/test_aws_euw1/cluster_vars.yml index e37fda24..a396a0bc 100644 --- a/EXAMPLE/group_vars/test_aws_euw1/cluster_vars.yml +++ b/EXAMPLE/group_vars/test_aws_euw1/cluster_vars.yml @@ -1,8 +1,8 @@ --- -redeploy_scheme: _scheme_addallnew_rmdisk_rollback +#redeploy_scheme: _scheme_addallnew_rmdisk_rollback #redeploy_scheme: _scheme_addnewvm_rmdisk_rollback -#redeploy_scheme: _scheme_rmvm_rmdisks_only +#redeploy_scheme: _scheme_rmvm_rmdisk_only app_name: "test" # The name of the application cluster (e.g. 'couchbase', 'nginx'); becomes part of cluster_name. app_class: "test" # The class of application (e.g. 'database', 'webserver'); becomes part of the fqdn @@ -35,10 +35,7 @@ cloud_agent: ## Bind configuration and credentials, per environment bind9: - sandbox: - server: - key_name: - key_secret: + sandbox: {server: "", key_name: "", key_secret: ""} cluster_name: "{{app_name}}-{{buildenv}}" # Identifies the cluster within the cloud environment diff --git a/EXAMPLE/group_vars/test_gcp_euw1/cluster_vars.yml b/EXAMPLE/group_vars/test_gcp_euw1/cluster_vars.yml index c5a01de0..02631539 100644 --- a/EXAMPLE/group_vars/test_gcp_euw1/cluster_vars.yml +++ b/EXAMPLE/group_vars/test_gcp_euw1/cluster_vars.yml @@ -4,9 +4,9 @@ gcp_credentials_file: "{{ lookup('env','GCP_CREDENTIALS') | default('/dev/null', true) }}" gcp_credentials_json: "{{ lookup('file', gcp_credentials_file) | default({'project_id': 'GCP_CREDENTIALS__NOT_SET','client_email': 'GCP_CREDENTIALS__NOT_SET'}, true) }}" -redeploy_scheme: _scheme_addallnew_rmdisk_rollback +#redeploy_scheme: _scheme_addallnew_rmdisk_rollback #redeploy_scheme: _scheme_addnewvm_rmdisk_rollback -#redeploy_scheme: _scheme_rmvm_rmdisks_only +#redeploy_scheme: _scheme_rmvm_rmdisk_only app_name: "test" # The name of the application cluster (e.g. 'couchbase', 'nginx'); becomes part of cluster_name. app_class: "test" # The class of application (e.g. 'database', 'webserver'); becomes part of the fqdn @@ -39,10 +39,7 @@ cloud_agent: ## Bind configuration and credentials, per environment bind9: - sandbox: - server: - key_name: - key_secret: + sandbox: {server: "", key_name: "", key_secret: ""} cluster_name: "{{app_name}}-{{buildenv}}" # Identifies the cluster within the cloud environment diff --git a/EXAMPLE/redeploy.yml b/EXAMPLE/redeploy.yml index 8c033ca8..345c9ec2 100644 --- a/EXAMPLE/redeploy.yml +++ b/EXAMPLE/redeploy.yml @@ -4,8 +4,12 @@ hosts: localhost connection: local tasks: + - name: "Get dependent roles via ansible-galaxy" + local_action: "command ansible-galaxy install -r requirements.yml" + tags: ["always"] + - name: Run redeploy - import_role: + include_role: name: clusterverse/redeploy vars: mainclusteryml: "cluster.yml" diff --git a/_dependencies/callback_plugins/cli_facts.py b/_dependencies/callback_plugins/cli_facts.py deleted file mode 100644 index 6620ca80..00000000 --- a/_dependencies/callback_plugins/cli_facts.py +++ /dev/null @@ -1,58 +0,0 @@ -#!/usr/bin/python -# GNU General Public License v3.0+ (see COPYING or https://www.gnu.org/licenses/gpl-3.0.txt) - -from __future__ import absolute_import, division, print_function - -__metaclass__ = type - -ANSIBLE_METADATA = {'metadata_version': '1.2', - 'status': ['preview'], - 'supported_by': 'community'} - -DOCUMENTATION = ''' ---- -module: cli_facts -short_description: Expose the system ARGV and CLI arguments as facts in plays. -version_added: "2.8" -author: "Dougal Seeley" -description: - - Expose the system ARGV and CLI arguments as facts in plays. Two new facts are added: argv and cliargs. -options: -requirements: -''' - -from ansible.plugins.callback import CallbackBase -from ansible.context import CLIARGS -from ansible.cli import CLI - -import sys - -HAS_MODULES = True - - -class CallbackModule(CallbackBase): - CALLBACK_VERSION = 2.0 - CALLBACK_TYPE = 'aggregate' - CALLBACK_NAME = 'cli_facts' - CALLBACK_NEEDS_WHITELIST = False - - def __init__(self, *args, **kwargs): - super(CallbackModule, self).__init__(*args, **kwargs) - self._cliargs = CLIARGS - self._argv = sys.argv - - def set_options(self, task_keys=None, var_options=None, direct=None): - super(CallbackModule, self).set_options(task_keys=task_keys, var_options=var_options, direct=direct) - - def v2_playbook_on_play_start(self, play): - variable_manager = play.get_variable_manager() - - # We cannot put 'localhost' in get_hosts(pattern=['all', 'localhost']) call, because of PR 58400, described below. - hosts = variable_manager._inventory.get_hosts(pattern=['all'], ignore_restrictions=True) - if variable_manager._inventory.localhost: hosts.append(variable_manager._inventory.localhost) - for host in hosts: - # Ansible 2.9 (https://github.com/ansible/ansible/pull/58400) changed the 'host' type in ansible/vars/manager.py::set_host_variable() from type to type string. - if CLI.version_info()['major'] >= 2 and CLI.version_info()['minor'] >= 9: - host = str(host) - variable_manager.set_host_variable(host, "cliargs", dict(self._cliargs)) - variable_manager.set_host_variable(host, "argv", self._argv) diff --git a/_dependencies/filter_plugins/custom.py b/_dependencies/filter_plugins/custom.py index 1b27c7ba..0796adaa 100644 --- a/_dependencies/filter_plugins/custom.py +++ b/_dependencies/filter_plugins/custom.py @@ -41,14 +41,18 @@ def iplookup(fqdn): def json_loads_loose(inStr): import re, json - display.vv(u"json_loads_loose - input type: %s" % type(inStr)) + display.vvv(u"json_loads_loose - input type: %s" % type(inStr)) if type(inStr) is dict or type(inStr) is list: json_object = json.loads((str(json.dumps(inStr))).encode('utf-8')) else: try: json_object = json.loads(inStr) except (ValueError, AttributeError) as e: - return json.loads(str(re.sub(r'\'(.*?)\'([,:}])', r'"\1"\2', inStr).replace(': True', ': "True"').replace(': False', ': "False"')).encode('utf-8')) + try: + json_object = json.loads(str(re.sub(r'\'(.*?)\'([,:}])', r'"\1"\2', inStr).replace(': True', ': "True"').replace(': False', ': "False"')).encode('utf-8')) + except (ValueError, AttributeError) as e: + display.v(u"json_loads_loose - WARNING: could not parse attribute string as json: %s" % inStr) + return inStr return json_object @@ -57,6 +61,5 @@ def filters(self): return { 'dict_agg': dict_agg, 'iplookup': iplookup, - 'json_loads_loose': json_loads_loose } diff --git a/_dependencies/vars_plugins/cli_facts.py b/_dependencies/vars_plugins/cli_facts.py new file mode 100644 index 00000000..5cd07ccf --- /dev/null +++ b/_dependencies/vars_plugins/cli_facts.py @@ -0,0 +1,30 @@ +from __future__ import absolute_import, division, print_function +__metaclass__ = type + +ANSIBLE_METADATA = {'metadata_version': '1.2', + 'status': ['preview'], + 'supported_by': 'community'} + +DOCUMENTATION = ''' +--- +cars: cli_facts +short_description: Expose the system ARGV and CLI arguments as facts in plays. +version_added: "2.8" +author: "Dougal Seeley" +description: + - Expose the system ARGV and CLI arguments as facts in plays. Two new facts are added: argv and cliargs. +options: +requirements: +''' + +from ansible.plugins.vars import BaseVarsPlugin +from ansible.context import CLIARGS +import sys + + +class VarsModule(BaseVarsPlugin): + REQUIRES_WHITELIST = False + + def get_vars(self, loader, path, entities, cache=True): + super(VarsModule, self).get_vars(loader, path, entities) + return {"cliargs": dict(CLIARGS), "argv": sys.argv} diff --git a/cluster_hosts/tasks/get_cluster_hosts_target.yml b/cluster_hosts/tasks/get_cluster_hosts_target.yml index cb113500..3eda6a80 100644 --- a/cluster_hosts/tasks/get_cluster_hosts_target.yml +++ b/cluster_hosts/tasks/get_cluster_hosts_target.yml @@ -101,7 +101,7 @@ when: cluster_vars.type == "aws" -- name: get_cluster_hosts_target/gcp | GCP-specific modifications to cluster_hosts_target - add rootvol size +- name: get_cluster_hosts_target/gcp | GCP-specific modifications to cluster_hosts_target block: - name: get_cluster_hosts_target/gcp | Update cluster_hosts_target with rootvol_size set_fact: diff --git a/cluster_hosts/tasks/main.yml b/cluster_hosts/tasks/main.yml index e631745b..1e5c95fe 100644 --- a/cluster_hosts/tasks/main.yml +++ b/cluster_hosts/tasks/main.yml @@ -20,9 +20,9 @@ cluster_suffix: "{{cluster_suffixes_current[0]}}" when: cluster_suffixes_current | unique | length==1 - - name: Create new cluster_suffix + - name: Create new cluster_suffix (epoch time) set_fact: - cluster_suffix: "{{ansible_date_time.epoch}}" + cluster_suffix: "{{ lookup('pipe', 'date +%s') }}" when: cluster_suffixes_current | unique | length==0 - debug: msg="cluster_suffix = {{cluster_suffix}}" diff --git a/config/tasks/create_dns_a.yml b/config/tasks/create_dns_a.yml index 2dd42c92..01733673 100644 --- a/config/tasks/create_dns_a.yml +++ b/config/tasks/create_dns_a.yml @@ -1,52 +1,57 @@ --- -- name: config/dns/a/nsupdate | create/update A records in bind (nsupdate) - nsupdate: - key_name: "{{bind9[buildenv].key_name}}" - key_secret: "{{bind9[buildenv].key_secret}}" - server: "{{bind9[buildenv].server}}" - ttl: 60 - zone: "{{cluster_vars.dns_nameserver_zone}}" - record: "{{item.hostname}}.{{cluster_vars.dns_user_domain | regex_replace('^(.*?)\\.' + cluster_vars.dns_nameserver_zone, '\\1')}}" - value: "{{ hostvars[item.hostname]['ansible_host'] }}" - become: false - delegate_to: localhost - run_once: true - with_items: "{{ cluster_hosts_target }}" +- block: + - name: config/dns/a/nsupdate | create/update A records in bind (nsupdate) + nsupdate: + key_name: "{{bind9[buildenv].key_name}}" + key_secret: "{{bind9[buildenv].key_secret}}" + server: "{{bind9[buildenv].server}}" + ttl: 60 + zone: "{{cluster_vars.dns_nameserver_zone}}" + record: "{{item.hostname}}.{{cluster_vars.dns_user_domain | regex_replace('^(.*?)\\.' + cluster_vars.dns_nameserver_zone, '\\1')}}" + value: "{{ hostvars[item.hostname]['ansible_host'] }}" + become: false + delegate_to: localhost + run_once: true + with_items: "{{ cluster_hosts_target }}" + + - name: config/dns/a/nsupdate | Wait for a short delay to allow zone transfers to complete (help prevent negative cache) + pause: + seconds: 10 when: cluster_vars.dns_server == "nsupdate" -- name: config/dns/a/route53 | create/update A records in AWS (route53) - route53: - aws_access_key: "{{cluster_vars[buildenv].aws_access_key}}" - aws_secret_key: "{{cluster_vars[buildenv].aws_secret_key}}" - state: present - zone: "{{cluster_vars.dns_nameserver_zone}}" - record: "{{item.hostname}}.{{cluster_vars.dns_user_domain}}" - type: A - ttl: 60 - value: "{{ hostvars[item.hostname]['ansible_host'] }}" - private_zone: "{{cluster_vars.route53_private_zone | default(true)}}" - overwrite: true - wait: yes - become: false - delegate_to: localhost - run_once: true - with_items: "{{ cluster_hosts_target }}" - when: cluster_vars.dns_server=="route53" - async: 7200 - poll: 0 - register: route53_records +- block: + - name: config/dns/a/route53 | create/update A records in AWS (route53) + route53: + aws_access_key: "{{cluster_vars[buildenv].aws_access_key}}" + aws_secret_key: "{{cluster_vars[buildenv].aws_secret_key}}" + state: present + zone: "{{cluster_vars.dns_nameserver_zone}}" + record: "{{item.hostname}}.{{cluster_vars.dns_user_domain}}" + type: A + ttl: 60 + value: "{{ hostvars[item.hostname]['ansible_host'] }}" + private_zone: "{{cluster_vars.route53_private_zone | default(true)}}" + overwrite: true + wait: yes + become: false + delegate_to: localhost + run_once: true + with_items: "{{ cluster_hosts_target }}" + async: 7200 + poll: 0 + register: r__route53 -- name: config/dns/a/route53 | Wait for records to be replicated to all Amazon Route 53 DNS servers - async_status: - jid: "{{ item.ansible_job_id }}" - register: route53_jobs - until: route53_jobs.finished - delay: 1 - retries: 300 - run_once: true - with_items: "{{route53_records.results}}" - delegate_to: localhost + - name: config/dns/a/route53 | Wait for records to be replicated to all Amazon Route 53 DNS servers + async_status: + jid: "{{ item.ansible_job_id }}" + register: route53_jobs + until: route53_jobs.finished + delay: 1 + retries: 300 + run_once: true + with_items: "{{r__route53.results}}" + delegate_to: localhost when: cluster_vars.dns_server=="route53" - name: config/dns/a/clouddns | create/update A records in GCP (clouddns) diff --git a/config/tasks/main.yml b/config/tasks/main.yml index 7de693b4..e842014a 100644 --- a/config/tasks/main.yml +++ b/config/tasks/main.yml @@ -80,9 +80,9 @@ include_tasks: cloud_agents.yml when: (cloud_agent is defined and cloud_agent) -- name: Update packages (when do_package_upgrade is defined) +- name: Update packages (when pkgupdate is defined) include_tasks: pkgupdate.yml - when: do_package_upgrade is defined and do_package_upgrade|bool + when: pkgupdate is defined and (pkgupdate == 'always' or (pkgupdate == 'onCreate' and inventory_hostname in (hostvars['localhost'].cluster_hosts_created | json_query('[].hostname')))) - name: Set hostname (e.g. AWS doesn't set it automatically) become: true diff --git a/config/tasks/pkgupdate.yml b/config/tasks/pkgupdate.yml index aa5600e8..e00ea510 100644 --- a/config/tasks/pkgupdate.yml +++ b/config/tasks/pkgupdate.yml @@ -7,6 +7,7 @@ - name: install aptitude (needed for the Debian upgrade) become: true apt: + update_cache: yes name: aptitude - name: run apt upgrade become: true @@ -24,17 +25,8 @@ register: yum_packages_update when: ansible_os_family == 'RedHat' - - block: - - name: reboot and wait (if reboot_on_package_upgrade==true) - become: yes - shell: sleep 2 && /sbin/shutdown -r now "Ansible system package upgraded" && sleep 5 - async: 1 - poll: 0 - - - set_fact: wait_host="{{ ansible_host }}" - - - name: Wait for server to come back - local_action: wait_for host={{wait_host}} delay=10 port=22 state=started - become: false + - name: reboot and wait (if reboot_on_package_upgrade==true) + become: yes + reboot: when: (reboot_on_package_upgrade is defined and reboot_on_package_upgrade|bool) and (apt_packages_update.changed or yum_packages_update.changed) when: pkgupdate is defined and (pkgupdate == 'always' or (pkgupdate == 'onCreate' and inventory_hostname in (hostvars['localhost'].cluster_hosts_created | json_query('[].hostname')))) diff --git a/create/tasks/main.yml b/create/tasks/main.yml index ed828dbe..d6e95706 100644 --- a/create/tasks/main.yml +++ b/create/tasks/main.yml @@ -21,10 +21,5 @@ current_release_versions: "{{ cluster_hosts_state | json_query(\"[?tagslabels.lifecycle_state=='current' && tagslabels.release].tagslabels.release\") | default([]) }}" -- name: Create AWS cluster - include_tasks: aws.yml - when: cluster_vars.type == "aws" - -- name: Create GCP cluster - include_tasks: gcp.yml - when: cluster_vars.type == "gcp" +- name: "Create {{cluster_vars.type}} cluster" + include_tasks: "{{cluster_vars.type}}.yml" diff --git a/dynamic_inventory/tasks/main.yml b/dynamic_inventory/tasks/main.yml index 98def030..4c041104 100644 --- a/dynamic_inventory/tasks/main.yml +++ b/dynamic_inventory/tasks/main.yml @@ -1,12 +1,7 @@ --- -- name: dynamic_inventory | Derive dynamic inventory for AWS cluster - include_tasks: aws.yml - when: cluster_vars.type == "aws" - -- name: dynamic_inventory | Derive dynamic inventory for GCP cluster - include_tasks: gcp.yml - when: cluster_vars.type == "gcp" +- name: "dynamic_inventory | Derive dynamic inventory for {{cluster_vars.type}} cluster" + include_tasks: "{{cluster_vars.type}}.yml" - assert: { that: "dynamic_inventory_flat is defined", msg: "dynamic_inventory_flat is not defined" } @@ -27,10 +22,10 @@ - name: dynamic_inventory | Add hosts to dynamic inventory add_host: name: "{{ item.hostname }}" - groups: ["{{ item.hosttype }}","{{ cluster_name }}","{{ clusterid }}","{{ item.regionzone }}"] + groups: "{{ item.hosttype }},{{ cluster_name }},{{ clusterid }}{%- if 'regionzone' in item -%},{{ item.regionzone }}{%- endif -%}" ansible_host: "{{ item.inventory_ip }}" hosttype: "{{ item.hosttype }}" - regionzone: "{{ item.regionzone }}" + regionzone: "{{ item.regionzone | default(omit) }}" with_items: "{{ dynamic_inventory_flat }}" - name: dynamic_inventory | stat the inventory_file path @@ -45,7 +40,7 @@ {% if groupname not in ["all", "ungrouped"] -%} [{{ groupname }}] {% for hostname in groups[groupname] %} - {{ hostname }} ansible_host={{hostvars[hostname].ansible_host}} hosttype={{ hostvars[hostname].hosttype }} regionzone={{ hostvars[hostname].regionzone }} + {{ hostname }} ansible_host={{hostvars[hostname].ansible_host}} hosttype={{ hostvars[hostname].hosttype }} {%- if 'regionzone' in hostvars[hostname] -%}regionzone={{ hostvars[hostname].regionzone }}{%- endif -%} {% endfor %} {% endif %} @@ -57,3 +52,4 @@ - name: dynamic_inventory | current inventory_hostnames debug: msg="{{ lookup('inventory_hostnames','all').split(',') }}" + when: lookup('inventory_hostnames','all') | length diff --git a/redeploy/tasks/main.yml b/redeploy/tasks/main.yml index 90f3b9ed..bb4f62ee 100644 --- a/redeploy/tasks/main.yml +++ b/redeploy/tasks/main.yml @@ -5,6 +5,7 @@ - assert: { that: "clean is not defined", msg: "Must not set the 'clean' variable for a redeploy" } - assert: { that: "canary is defined and (canary is defined and canary in ['start', 'finish', 'none', 'tidy', 'revert'])", msg: "Canary must be 'start', 'finish', 'none', 'tidy' or 'revert'" } - assert: { that: "redeploy_scheme is defined" } + - assert: { that: "cluster_hosts_state | length", msg: "Redeploy only possible with an existing cluster." } - name: "Run the {{redeploy_scheme}} redploy scheme" include_role: From 6aceac8e681488d34493b0db735cc22b269c5435 Mon Sep 17 00:00:00 2001 From: Dougal Seeley Date: Sun, 20 Sep 2020 07:52:23 +0100 Subject: [PATCH 06/58] Increase efficiency of _scheme_rmvm_keepdisk_only and add AWS support --- EXAMPLE/group_vars/_skel/cluster_vars.yml | 8 +- .../group_vars/test_aws_euw1/cluster_vars.yml | 8 +- _dependencies/tasks/main.yml | 3 + config/tasks/disks_auto.yml | 20 +-- create/tasks/aws.yml | 126 ++++++++++++------ .../tasks/main.yml | 2 +- .../tasks/main.yml | 2 +- .../tasks/_add_diskinfo_esxifree.yml | 36 ----- ..._diskinfo_to_cluster_hosts_target__aws.yml | 34 +++++ ...info_to_cluster_hosts_target__esxifree.yml | 37 +++++ .../tasks/by_hosttype.yml | 2 +- .../tasks/by_hosttype_by_host.yml | 49 +++---- .../_scheme_rmvm_keepdisk_only/tasks/main.yml | 14 +- 13 files changed, 207 insertions(+), 134 deletions(-) delete mode 100644 redeploy/_scheme_rmvm_keepdisk_only/tasks/_add_diskinfo_esxifree.yml create mode 100644 redeploy/_scheme_rmvm_keepdisk_only/tasks/_add_src_diskinfo_to_cluster_hosts_target__aws.yml create mode 100644 redeploy/_scheme_rmvm_keepdisk_only/tasks/_add_src_diskinfo_to_cluster_hosts_target__esxifree.yml diff --git a/EXAMPLE/group_vars/_skel/cluster_vars.yml b/EXAMPLE/group_vars/_skel/cluster_vars.yml index 92498340..dae9d27d 100644 --- a/EXAMPLE/group_vars/_skel/cluster_vars.yml +++ b/EXAMPLE/group_vars/_skel/cluster_vars.yml @@ -72,12 +72,12 @@ cluster_name: "{{app_name}}-{{buildenv}}" # Identifies the cluster within # hosttype_vars: # sys: {vms_by_az: {a: 1, b: 1, c: 1}, flavor: t3a.nano, version: "{{sys_version | default('')}}", auto_volumes: []} # # sysnobeats: {vms_by_az: {a: 1, b: 0, c: 0}, skip_beat_install:true, flavor: t3a.nano, version: "{{sysnobeats_version | default('')}}", auto_volumes: [] -# # sysdisks: {vms_by_az: {a: 1, b: 0, c: 0}, flavor: t3a.nano, version: "{{sysdisks_version | default('')}}", auto_volumes: [{"device_name": "/dev/sdb", mountpoint: "/var/log/mysvc", fstype: "ext4", "volume_type": "gp2", "volume_size": 2, ephemeral: False, encrypted: True, "delete_on_termination": true, perms: {owner: "root", group: "sudo", mode: "775"} }, {"device_name": "/dev/sdc", mountpoint: "/var/log/mysvc2", fstype: "ext4", "volume_type": "gp2", "volume_size": 3, ephemeral: False, encrypted: True, "delete_on_termination": true}, {"device_name": "/dev/sdd", mountpoint: "/var/log/mysvc3", fstype: "ext4", "volume_type": "gp2", "volume_size": 2, ephemeral: False, encrypted: True, "delete_on_termination": true}]} -# # sysdisks_snapshot: {vms_by_az: {a: 1, b: 1, c: 0}, flavor: t3a.nano, auto_volumes: [{"snapshot_tags": {"tag:backup_id": "57180566894481854905"}, "device_name": "/dev/sdb", mountpoint: "/data", fstype: "ext4", "volume_type": "gp2", "volume_size": 2, ephemeral: False, encrypted: True, "delete_on_termination": true }]} +# # sysdisks: {vms_by_az: {a: 1, b: 0, c: 0}, flavor: t3a.nano, version: "{{sysdisks_version | default('')}}", auto_volumes: [{"device_name": "/dev/sdf", mountpoint: "/var/log/mysvc", fstype: "ext4", "volume_type": "gp2", "volume_size": 2, ephemeral: False, encrypted: True, "delete_on_termination": true, perms: {owner: "root", group: "sudo", mode: "775"} }, {"device_name": "/dev/sdg", mountpoint: "/var/log/mysvc2", fstype: "ext4", "volume_type": "gp2", "volume_size": 3, ephemeral: False, encrypted: True, "delete_on_termination": true}, {"device_name": "/dev/sdh", mountpoint: "/var/log/mysvc3", fstype: "ext4", "volume_type": "gp2", "volume_size": 2, ephemeral: False, encrypted: True, "delete_on_termination": true}]} +# # sysdisks_snapshot: {vms_by_az: {a: 1, b: 1, c: 0}, flavor: t3a.nano, auto_volumes: [{"snapshot_tags": {"tag:backup_id": "57180566894481854905"}, "device_name": "/dev/sdf", mountpoint: "/data", fstype: "ext4", "volume_type": "gp2", "volume_size": 2, ephemeral: False, encrypted: True, "delete_on_termination": true }]} # # hostnvme_multi: {vms_by_az: {a: 1, b: 0, c: 0}, flavor: i3en.2xlarge, auto_volumes: [], nvme: {volumes: [{mountpoint: "/var/log/mysvc", fstype: ext4, volume_size: 2500}, {mountpoint: "/var/log/mysvc2", fstype: ext4, volume_size: 2500}]} } # # hostnvme_lvm: {vms_by_az: {a: 1, b: 0, c: 0}, flavor: i3en.2xlarge, auto_volumes: [], nvme: {volumes: [{mountpoint: "/var/log/mysvc", fstype: ext4, volume_size: 2500}, {mountpoint: "/var/log/mysvc", fstype: ext4, volume_size: 2500}], lvmparams: {vg_name: "vg0", lv_name: "lv0", lv_size: "+100%FREE"} } } -# # hostssd: {vms_by_az: {a: 1, b: 0, c: 0}, flavor: c3.large, auto_volumes: [{device_name: "/dev/sdb", mountpoint: "/var/log/mysvc", fstype: "ext4", "volume_type": "gp2", "volume_size": 2, ephemeral: False, encrypted: True, "delete_on_termination": true}]} -# # hosthdd: {vms_by_az: {a: 1, b: 0, c: 0}, flavor: h1.2xlarge, auto_volumes: [{device_name: "/dev/sdb", mountpoint: "/var/log/mysvc", fstype: "ext4", "volume_type": "gp2", "volume_size": 2, ephemeral: False, encrypted: True, "delete_on_termination": true}]} +# # hostssd: {vms_by_az: {a: 1, b: 0, c: 0}, flavor: c3.large, auto_volumes: [{device_name: "/dev/sdf", mountpoint: "/var/log/mysvc", fstype: "ext4", "volume_type": "gp2", "volume_size": 2, ephemeral: False, encrypted: True, "delete_on_termination": true}]} +# # hosthdd: {vms_by_az: {a: 1, b: 0, c: 0}, flavor: h1.2xlarge, auto_volumes: [{device_name: "/dev/sdf", mountpoint: "/var/log/mysvc", fstype: "ext4", "volume_type": "gp2", "volume_size": 2, ephemeral: False, encrypted: True, "delete_on_termination": true}]} # aws_access_key: "" # aws_secret_key: "" # vpc_name: "test{{buildenv}}" diff --git a/EXAMPLE/group_vars/test_aws_euw1/cluster_vars.yml b/EXAMPLE/group_vars/test_aws_euw1/cluster_vars.yml index 03d31890..f5c5c048 100644 --- a/EXAMPLE/group_vars/test_aws_euw1/cluster_vars.yml +++ b/EXAMPLE/group_vars/test_aws_euw1/cluster_vars.yml @@ -78,12 +78,12 @@ cluster_vars: sandbox: hosttype_vars: sys: {vms_by_az: {a: 1, b: 1, c: 1}, flavor: t3a.nano, version: "{{sys_version | default('')}}", auto_volumes: []} -# sysdisks: {vms_by_az: {a: 1, b: 0, c: 0}, flavor: t3a.nano, version: "{{sysdisks_version | default('')}}", auto_volumes: [{"device_name": "/dev/sdb", mountpoint: "/var/log/mysvc", fstype: "ext4", "volume_type": "gp2", "volume_size": 2, ephemeral: False, encrypted: True, "delete_on_termination": true, perms: {owner: "root", group: "sudo", mode: "775"} }, {"device_name": "/dev/sdc", mountpoint: "/var/log/mysvc2", fstype: "ext4", "volume_type": "gp2", "volume_size": 3, ephemeral: False, encrypted: True, "delete_on_termination": true}, {"device_name": "/dev/sdd", mountpoint: "/var/log/mysvc3", fstype: "ext4", "volume_type": "gp2", "volume_size": 2, ephemeral: False, encrypted: True, "delete_on_termination": true}]} -# sysdisks_snapshot: {vms_by_az: {a: 1, b: 1, c: 0}, flavor: t3a.nano, auto_volumes: [{"snapshot_tags": {"tag:backup_id": "57180566894481854905"}, "device_name": "/dev/sdb", mountpoint: "/data", fstype: "ext4", "volume_type": "gp2", "volume_size": 2, ephemeral: False, encrypted: True, "delete_on_termination": true }]} +# sysdisks: {vms_by_az: {a: 1, b: 0, c: 0}, flavor: t3a.nano, version: "{{sysdisks_version | default('')}}", auto_volumes: [{"device_name": "/dev/sdf", mountpoint: "/var/log/mysvc", fstype: "ext4", "volume_type": "gp2", "volume_size": 2, ephemeral: False, encrypted: True, "delete_on_termination": true, perms: {owner: "root", group: "sudo", mode: "775"} }, {"device_name": "/dev/sdg", mountpoint: "/var/log/mysvc2", fstype: "ext4", "volume_type": "gp2", "volume_size": 3, ephemeral: False, encrypted: True, "delete_on_termination": true}, {"device_name": "/dev/sdh", mountpoint: "/var/log/mysvc3", fstype: "ext4", "volume_type": "gp2", "volume_size": 2, ephemeral: False, encrypted: True, "delete_on_termination": true}]} +# sysdisks_snapshot: {vms_by_az: {a: 1, b: 1, c: 0}, flavor: t3a.nano, auto_volumes: [{"snapshot_tags": {"tag:backup_id": "57180566894481854905"}, "device_name": "/dev/sdf", mountpoint: "/data", fstype: "ext4", "volume_type": "gp2", "volume_size": 2, ephemeral: False, encrypted: True, "delete_on_termination": true }]} # hostnvme_multi: {vms_by_az: {a: 1, b: 0, c: 0}, flavor: i3en.2xlarge, auto_volumes: [], nvme: {volumes: [{mountpoint: "/var/log/mysvc", fstype: ext4, volume_size: 2500}, {mountpoint: "/var/log/mysvc2", fstype: ext4, volume_size: 2500}]} } # hostnvme_lvm: {vms_by_az: {a: 1, b: 0, c: 0}, flavor: i3en.2xlarge, auto_volumes: [], nvme: {volumes: [{mountpoint: "/var/log/mysvc", fstype: ext4, volume_size: 2500}, {mountpoint: "/var/log/mysvc", fstype: ext4, volume_size: 2500}], lvmparams: {vg_name: "vg0", lv_name: "lv0", lv_size: "+100%FREE"} } } -# hostssd: {vms_by_az: {a: 1, b: 0, c: 0}, flavor: c3.large, auto_volumes: [{device_name: "/dev/sdb", mountpoint: "/var/log/mysvc", fstype: "ext4", "volume_type": "gp2", "volume_size": 2, ephemeral: False, encrypted: True, "delete_on_termination": true}]} -# hosthdd: {vms_by_az: {a: 1, b: 0, c: 0}, flavor: h1.2xlarge, auto_volumes: [{device_name: "/dev/sdb", mountpoint: "/var/log/mysvc", fstype: "ext4", "volume_type": "gp2", "volume_size": 2, ephemeral: False, encrypted: True, "delete_on_termination": true}]} +# hostssd: {vms_by_az: {a: 1, b: 0, c: 0}, flavor: c3.large, auto_volumes: [{device_name: "/dev/sdf", mountpoint: "/var/log/mysvc", fstype: "ext4", "volume_type": "gp2", "volume_size": 2, ephemeral: False, encrypted: True, "delete_on_termination": true}]} +# hosthdd: {vms_by_az: {a: 1, b: 0, c: 0}, flavor: h1.2xlarge, auto_volumes: [{device_name: "/dev/sdf", mountpoint: "/var/log/mysvc", fstype: "ext4", "volume_type": "gp2", "volume_size": 2, ephemeral: False, encrypted: True, "delete_on_termination": true}]} aws_access_key: "" aws_secret_key: "" vpc_name: "test{{buildenv}}" diff --git a/_dependencies/tasks/main.yml b/_dependencies/tasks/main.yml index 8f25ca87..8cf92f20 100644 --- a/_dependencies/tasks/main.yml +++ b/_dependencies/tasks/main.yml @@ -33,5 +33,8 @@ when: "'custom_tagslabels' in cluster_vars" - assert: { that: "'{%- for hosttype in cluster_vars[buildenv].hosttype_vars -%}{% if ('version' in cluster_vars[buildenv].hosttype_vars[hosttype]) and (not cluster_vars[buildenv].hosttype_vars[hosttype].version is regex('^[a-z\\d\\-_]{0,63}$')) %}{{cluster_vars[buildenv].hosttype_vars[hosttype].version}}{% endif %}{%- endfor -%}' == ''", fail_msg: "Please ensure cluster_vars[{{buildenv}}].hosttype_vars[hosttype].version is in the set [a-z\\d\\-_], and <63 characters long." } + - assert: { that: "cluster_vars[buildenv] | json_query(\"hosttype_vars.*.auto_volumes[] | [?contains(`/dev/sdb,/dev/sdc,/dev/sdd,/dev/sde`, device_name)]\") | length == 0", msg: "/dev/sd[b-e] are not allowed as device_name in AWS cluster_vars[buildenv].hosttype_vars. Please start at /dev/sdf." } + when: cluster_vars.type == "aws" + - assert: { that: "(cluster_vars.assign_public_ip == 'yes' and cluster_vars.inventory_ip == 'public') or (cluster_vars.inventory_ip == 'private')", msg: "If inventory_ip=='public', 'assign_public_ip' must be 'yes'" } when: cluster_vars.type == "gcp" or cluster_vars.type == "aws" diff --git a/config/tasks/disks_auto.yml b/config/tasks/disks_auto.yml index ad3715c4..aefd1d5b 100644 --- a/config/tasks/disks_auto.yml +++ b/config/tasks/disks_auto.yml @@ -21,17 +21,17 @@ hostvols: | {% set res = [] -%} {% set tmp_blkvols = lsblk_volumes -%} - {% set inventory_hostname__no_suffix = inventory_hostname | regex_replace('-(?!.*-).*') -%} - {%- for autovol in cluster_hosts_target | to_json | from_json | json_query('[?starts_with(hostname, \'' + inventory_hostname__no_suffix + '\')].auto_volumes[]') -%} - {%- set blkvolloop = namespace(break=false) -%} - {%- for blkvol in tmp_blkvols if not blkvolloop.break -%} - {%- if (autovol.volume_size*1073741824|int) == (blkvol.size_b|int) -%} - {%- set _ = res.extend([ {'device': '/dev/'+blkvol.dev, 'mountpoint': autovol.mountpoint, 'fstype': autovol.fstype, 'perms': autovol.perms | default({})}]) -%} - {%- set _ = tmp_blkvols.remove(blkvol) -%} - {%- set blkvolloop.break = true -%} - {%- endif -%} - {%- endfor -%} + {% set inventory_hostname__no_suffix = inventory_hostname | regex_replace('-(?!.*-).*') -%} + {%- for autovol in cluster_hosts_target | to_json | from_json | json_query('[?starts_with(hostname, \'' + inventory_hostname__no_suffix + '\')].auto_volumes[]') -%} + {%- set blkvolloop = namespace(break=false) -%} + {%- for blkvol in tmp_blkvols if not blkvolloop.break -%} + {%- if (autovol.volume_size*1073741824|int) == (blkvol.size_b|int) -%} + {%- set _ = res.extend([ {'device': '/dev/'+blkvol.dev, 'mountpoint': autovol.mountpoint, 'fstype': autovol.fstype, 'perms': autovol.perms | default({})}]) -%} + {%- set _ = tmp_blkvols.remove(blkvol) -%} + {%- set blkvolloop.break = true -%} + {%- endif -%} {%- endfor -%} + {%- endfor -%} {{ res }} #- name: autodisks | hostvols diff --git a/create/tasks/aws.yml b/create/tasks/aws.yml index e779dc28..8300042a 100644 --- a/create/tasks/aws.yml +++ b/create/tasks/aws.yml @@ -20,6 +20,16 @@ - name: create/aws | Create EC2 VMs asynchronously and wait for completion block: + - name: create/aws | Detach volumes from previous instances (during the _scheme_rmvm_keepdisk_only redeploy, we only redeploy one host at a time, and it is already powered off) + ec2_vol: + aws_access_key: "{{cluster_vars[buildenv].aws_access_key}}" + aws_secret_key: "{{cluster_vars[buildenv].aws_secret_key}}" + region: "{{cluster_vars.region}}" + id: "{{item.auto_volume.src.volume_id}}" + instance: None + loop: "{{ cluster_hosts_target_denormalised_by_volume }}" + when: "'src' in item.auto_volume" + - name: create/aws | Create EC2 VMs asynchronously ec2: aws_access_key: "{{cluster_vars[buildenv].aws_access_key}}" @@ -35,9 +45,7 @@ wait: yes instance_tags: "{{ _instance_tags | combine(cluster_vars.custom_tagslabels | default({})) }}" termination_protection: "{{cluster_vars[buildenv].termination_protection}}" - volumes: "{{ item.auto_volumes | default([]) }}" - count_tag: - Name: "{{item.hostname}}" + count_tag: { Name: "{{item.hostname}}" } exact_count: 1 vars: _instance_tags: @@ -51,56 +59,82 @@ maintenance_mode: "true" release: "{{ release_version }}" lifecycle_state: "current" - with_items: "{{cluster_hosts_target}}" + loop: "{{ cluster_hosts_target }}" async: 7200 poll: 0 - register: aws_instances + register: r__ec2 - name: create/aws | Wait for aws instance creation to complete - async_status: - jid: "{{ item.ansible_job_id }}" - register: aws_jobs - until: aws_jobs.finished + async_status: { jid: "{{ item.ansible_job_id }}" } + register: r__async_status__ec2 + until: r__async_status__ec2.finished delay: 3 retries: 300 - with_items: "{{aws_instances.results}}" + with_items: "{{r__ec2.results}}" -# - name: create/aws | aws_jobs.results -# debug: msg={{aws_jobs.results}} + - name: create/aws | r__async_status__ec2.results + debug: msg={{r__async_status__ec2.results}} - name: create/aws | Set a fact containing the newly-created hosts set_fact: - cluster_hosts_created: "{{ aws_jobs.results | json_query(\"[?changed==`true`].item.item\") }}" + cluster_hosts_created: "{{ r__async_status__ec2.results | json_query(\"[?changed==`true`].item.item\") }}" - - name: create/aws | Force set maintenance_mode to true (when prometheus_set_unset_maintenance_mode) - ec2_tag: + - name: create/aws | Create new volumes asynchronously (or attach existing if src is present, e.g. via the _scheme_rmvm_keepdisk_only scheme) + ec2_vol: aws_access_key: "{{cluster_vars[buildenv].aws_access_key}}" aws_secret_key: "{{cluster_vars[buildenv].aws_secret_key}}" region: "{{cluster_vars.region}}" - resource: "{{ item }}" - tags: - maintenance_mode: "true" - with_items: "{{ aws_jobs.results | json_query('[].tagged_instances[0].id')}}" - when: (prometheus_set_unset_maintenance_mode is defined and prometheus_set_unset_maintenance_mode|bool) + instance: "{{ r__async_status__ec2.results | json_query(\"[].tagged_instances[?tags.Name==`\" + item.hostname + \"`].id[] | [0]\") | default(omit) }}" + id: "{{item.auto_volume.src.volume_id | default(omit)}}" + snapshot: "{{item.auto_volume.snapshot | default(omit)}}" + device_name: "{{item.auto_volume.device_name}}" + encrypted: "{{item.auto_volume.encrypted}}" + volume_size: "{%- if 'src' not in item.auto_volume -%}{{item.auto_volume.volume_size}}{%- endif -%}" + volume_type: "{{item.auto_volume.volume_type}}" + delete_on_termination: yes + loop: "{{ cluster_hosts_target_denormalised_by_volume }}" + async: 7200 + poll: 0 + register: r__ec2_vol + + - name: create/aws | Wait for volume creation/ attachment to complete + async_status: { jid: "{{ item.ansible_job_id }}" } + register: r__async_status__ec2_vol + until: r__async_status__ec2_vol.finished + delay: 3 + retries: 300 + with_items: "{{r__ec2_vol.results}}" + vars: + cluster_hosts_target_denormalised_by_volume: | + {% set res = [] -%} + {%- for cht_host in cluster_hosts_target -%} + {%- for autovol in cht_host.auto_volumes -%} + {%- set elem = {} -%} + {%- for cht_host_key in cht_host.keys() -%} + {%- if cht_host_key != 'auto_volumes' -%} + {%- set _ = elem.update({cht_host_key: cht_host[cht_host_key]}) -%} + {%- else -%} + {%- set _ = elem.update({'auto_volume': autovol}) -%} + {%- endif -%} + {%- endfor -%} + {%- set _ = res.append(elem) -%} + {%- endfor -%} + {%- endfor -%} + {{res}} - - name: create/aws | Extract EBS volume data so we can tag the disks - set_fact: - ebsdata: | - {% set res = [] -%} - {%- for host in aws_jobs.results -%} - {%- for devkey in host.tagged_instances[0].block_device_mapping.keys()-%} - {% set _dummy = res.extend([{ - 'hostname': host.tagged_instances[0].tags.Name, - 'ec2_id': host.tagged_instances[0].id, - 'device_name': devkey, - 'volume_id': host.tagged_instances[0].block_device_mapping[devkey].volume_id - }]) -%} - {%- endfor %} - {%- endfor %} - {{ res }} -# - name: create/aws | ebsdata -# debug: msg={{ebsdata}} +- name: create/aws | Tag the EBS volumes + block: + - name: create/aws | Get the ec2_instance_info for EBS tagging + ec2_instance_info: + filters: + "instance-state-name": ["running", "stopped"] + "tag:cluster_name": "{{cluster_name}}" + "tag:lifecycle_state": "current" + aws_access_key: "{{cluster_vars[buildenv].aws_access_key}}" + aws_secret_key: "{{cluster_vars[buildenv].aws_secret_key}}" + region: "{{cluster_vars.region}}" + register: r__ec2_instance_info - name: create/aws | Set the ec2 volume name tag ec2_tag: @@ -108,6 +142,20 @@ aws_secret_key: "{{cluster_vars[buildenv].aws_secret_key}}" region: "{{cluster_vars.region}}" resource: "{{item.volume_id}}" - tags: + tags: "{{ _tags | combine(cluster_vars.custom_tagslabels | default({})) }}" + with_items: "{{_ec2_vols_denormalised_by_device}}" + vars: + _ec2_vols_denormalised_by_device: | + {% set res = [] -%} + {%- for host_instance in r__ec2_instance_info.instances -%} + {%- for block_device in host_instance.block_device_mappings -%} + {% set _ = res.append({'hostname': host_instance.tags.Name, 'hosttype': host_instance.tags.hosttype, 'device_name': block_device.device_name, 'volume_id': block_device.ebs.volume_id}) -%} + {%- endfor %} + {%- endfor %} + {{ res }} + _tags: Name: "{{ item.hostname }}--{{item.device_name | regex_replace('^.*\\/(.*)', '\\1')}}" - with_items: "{{ebsdata}}" + inv_node_version: "{{cluster_vars[buildenv].hosttype_vars[item.hosttype].version | default(omit)}}" + inv_node_type: "{{item.hosttype}}" + owner: "{{ lookup('env','USER') | lower }}" + release: "{{ release_version }}" diff --git a/redeploy/_scheme_addallnew_rmdisk_rollback/tasks/main.yml b/redeploy/_scheme_addallnew_rmdisk_rollback/tasks/main.yml index 728a5618..29ee5ac6 100644 --- a/redeploy/_scheme_addallnew_rmdisk_rollback/tasks/main.yml +++ b/redeploy/_scheme_addallnew_rmdisk_rollback/tasks/main.yml @@ -5,7 +5,7 @@ - assert: { that: "non_current_hosts | length == 0", msg: "ERROR - All VMs must be in the 'current' lifecycle_state. Those not [{{non_current_hosts | join(',')}}]" } vars: non_current_hosts: "{{ cluster_hosts_state | json_query(\"[?tagslabels.lifecycle_state!='current'].name\") }}" - when: canary=="start" or canary=="none" + when: (canary=="start" or canary=="none") and (ignore_lifecycle_check is not defined or ignore_lifecycle_check==false) - name: Redeploy by replacing entire cluster; rollback on fail diff --git a/redeploy/_scheme_addnewvm_rmdisk_rollback/tasks/main.yml b/redeploy/_scheme_addnewvm_rmdisk_rollback/tasks/main.yml index 2a71a918..08f668a9 100644 --- a/redeploy/_scheme_addnewvm_rmdisk_rollback/tasks/main.yml +++ b/redeploy/_scheme_addnewvm_rmdisk_rollback/tasks/main.yml @@ -5,7 +5,7 @@ - assert: { that: "non_current_hosts | length == 0", msg: "ERROR - All VMs must be in the 'current' lifecycle_state. Those not [{{non_current_hosts | join(',')}}]" } vars: non_current_hosts: "{{ cluster_hosts_state | json_query(\"[?tagslabels.lifecycle_state!='current'].name\") }}" - when: canary=="start" or canary=="none" + when: (canary=="start" or canary=="none") and (ignore_lifecycle_check is not defined or ignore_lifecycle_check==false) - name: Redeploy by hosttype; rollback on fail diff --git a/redeploy/_scheme_rmvm_keepdisk_only/tasks/_add_diskinfo_esxifree.yml b/redeploy/_scheme_rmvm_keepdisk_only/tasks/_add_diskinfo_esxifree.yml deleted file mode 100644 index 40e7c103..00000000 --- a/redeploy/_scheme_rmvm_keepdisk_only/tasks/_add_diskinfo_esxifree.yml +++ /dev/null @@ -1,36 +0,0 @@ ---- - -- name: _get_diskinfo_esxifree | hosts_to_stop - debug: msg="{{hosts_to_stop}}" - -- name: _get_diskinfo_esxifree | vmware_guest_disk_info - vmware_guest_disk_info: - username: "{{ cluster_vars.username }}" - password: "{{ cluster_vars.password }}" - hostname: "{{ cluster_vars.esxi_ip }}" - datacenter: ha-datacenter - validate_certs: no - name: "{{item.name}}" - with_items: "{{hosts_to_stop}}" - register: r__vmware_guest_disk_info - -#- name: _get_diskinfo_esxifree | debug r__vmware_guest_disk_info -# debug: msg={{r__vmware_guest_disk_info}} - -- assert: { that: "r__vmware_guest_disk_info | json_query(\"results[].guest_disk_info.*[] | [?backing_datastore!='\" + cluster_vars.datastore + \"']\") | length == 0", msg: "Move is only possible if disks are on the same datastore." } - when: _scheme_rmvm_keepdisk_only__copy_or_move == "move" - -- name: _get_diskinfo_esxifree | augment cluster_host_redeploying's auto_volumes with source disk info - set_fact: - cluster_host_redeploying: | - {% set res = _cluster_host_redeploying_loopvar -%} - {%- for autovol in res.auto_volumes -%} - {%- for host_to_stop_diskinfo_result in r__vmware_guest_disk_info.results -%} - {%- if res.hostname | regex_replace('-(?!.*-).*') == host_to_stop_diskinfo_result.item.name | regex_replace('-(?!.*-).*') -%} - {%- for host_to_stop_diskinfo in host_to_stop_diskinfo_result.guest_disk_info | to_json | from_json | json_query('*| [?unit_number!=`0` && backing_type==\'FlatVer2\' && contains(backing_filename, \'--' + autovol.volname + '.vmdk\')]') -%} - {%- set _ = autovol.update({'volume_size': (host_to_stop_diskinfo.capacity_in_bytes/1073741824)|int, 'src': {'backing_filename': host_to_stop_diskinfo.backing_filename, 'copy_or_move': _scheme_rmvm_keepdisk_only__copy_or_move }}) -%} - {%- endfor -%} - {%- endif -%} - {%- endfor -%} - {%- endfor -%} - {{res}} diff --git a/redeploy/_scheme_rmvm_keepdisk_only/tasks/_add_src_diskinfo_to_cluster_hosts_target__aws.yml b/redeploy/_scheme_rmvm_keepdisk_only/tasks/_add_src_diskinfo_to_cluster_hosts_target__aws.yml new file mode 100644 index 00000000..fda28843 --- /dev/null +++ b/redeploy/_scheme_rmvm_keepdisk_only/tasks/_add_src_diskinfo_to_cluster_hosts_target__aws.yml @@ -0,0 +1,34 @@ +--- + +- name: _get_diskinfo_aws | ec2_instance_info + ec2_instance_info: + filters: + "instance-state-name": ["running", "stopped"] + "tag:cluster_name": "{{cluster_name}}" + "tag:lifecycle_state": "retiring" + aws_access_key: "{{cluster_vars[buildenv].aws_access_key}}" + aws_secret_key: "{{cluster_vars[buildenv].aws_secret_key}}" + region: "{{cluster_vars.region}}" + register: r__ec2_instance_info + +#- name: _get_diskinfo_aws | r__ec2_instance_info +# debug: msg={{r__ec2_instance_info}} + +- name: _get_diskinfo_aws | augment cluster_hosts_target auto_volumes with source disk info + set_fact: + cluster_hosts_target: | + {%- for cht_host in cluster_hosts_target -%} + {%- for cht_autovol in cht_host.auto_volumes -%} + {%- for chs_host_info_result in r__ec2_instance_info.instances -%} + {%- if cht_host.hostname | regex_replace('-(?!.*-).*') == chs_host_info_result.tags.Name | regex_replace('-(?!.*-).*') -%} + {%- for chs_host_diskinfo in chs_host_info_result.block_device_mappings | selectattr('device_name', '==', cht_autovol.device_name) -%} + {%- set _ = cht_autovol.update({'src': {'instance_id': chs_host_info_result.instance_id, 'volume_id': chs_host_diskinfo.ebs.volume_id }}) -%} + {%- endfor -%} + {%- endif -%} + {%- endfor -%} + {%- endfor -%} + {%- endfor -%} + {{cluster_hosts_target}} + +- name: _get_diskinfo_aws | cluster_hosts_target + debug: msg={{cluster_hosts_target}} diff --git a/redeploy/_scheme_rmvm_keepdisk_only/tasks/_add_src_diskinfo_to_cluster_hosts_target__esxifree.yml b/redeploy/_scheme_rmvm_keepdisk_only/tasks/_add_src_diskinfo_to_cluster_hosts_target__esxifree.yml new file mode 100644 index 00000000..ee89196e --- /dev/null +++ b/redeploy/_scheme_rmvm_keepdisk_only/tasks/_add_src_diskinfo_to_cluster_hosts_target__esxifree.yml @@ -0,0 +1,37 @@ +--- + +- name: _get_diskinfo_esxifree | vmware_guest_disk_info + vmware_guest_disk_info: + username: "{{ cluster_vars.username }}" + password: "{{ cluster_vars.password }}" + hostname: "{{ cluster_vars.esxi_ip }}" + datacenter: ha-datacenter + validate_certs: no + name: "{{item.name}}" + with_items: "{{cluster_hosts_state}}" + register: r__vmware_guest_disk_info + +#- name: _get_diskinfo_esxifree | r__vmware_guest_disk_info +# debug: msg={{r__vmware_guest_disk_info}} + +- assert: { that: "r__vmware_guest_disk_info | json_query(\"results[].guest_disk_info.*[] | [?backing_datastore!='\" + cluster_vars.datastore + \"']\") | length == 0", msg: "Move is only possible if disks are on the same datastore." } + when: _scheme_rmvm_keepdisk_only__copy_or_move == "move" + +- name: _get_diskinfo_esxifree | augment cluster_hosts_target auto_volumes with source disk info + set_fact: + cluster_hosts_target: | + {%- for cht_host in cluster_hosts_target -%} + {%- for cht_autovol in cht_host.auto_volumes -%} + {%- for chs_host_info_result in r__vmware_guest_disk_info.results -%} + {%- if cht_host.hostname | regex_replace('-(?!.*-).*') == chs_host_info_result.item.name | regex_replace('-(?!.*-).*') -%} + {%- for chs_host_diskinfo in chs_host_info_result.guest_disk_info | to_json | from_json | json_query('*| [?unit_number!=`0` && backing_type==\'FlatVer2\' && contains(backing_filename, \'--' + cht_autovol.volname + '.vmdk\')]') -%} + {%- set _ = cht_autovol.update({'volume_size': (chs_host_diskinfo.capacity_in_bytes/1073741824)|int, 'src': {'backing_filename': chs_host_diskinfo.backing_filename, 'copy_or_move': _scheme_rmvm_keepdisk_only__copy_or_move }}) -%} + {%- endfor -%} + {%- endif -%} + {%- endfor -%} + {%- endfor -%} + {%- endfor -%} + {{cluster_hosts_target}} + +#- name: _get_diskinfo_esxifree | cluster_hosts_target +# debug: msg={{cluster_hosts_target}} diff --git a/redeploy/_scheme_rmvm_keepdisk_only/tasks/by_hosttype.yml b/redeploy/_scheme_rmvm_keepdisk_only/tasks/by_hosttype.yml index 1bebc776..6cc2b25d 100644 --- a/redeploy/_scheme_rmvm_keepdisk_only/tasks/by_hosttype.yml +++ b/redeploy/_scheme_rmvm_keepdisk_only/tasks/by_hosttype.yml @@ -20,4 +20,4 @@ include_tasks: by_hosttype_by_host.yml with_items: "{{ hosts_to_redeploy }}" loop_control: - loop_var: _cluster_host_redeploying_loopvar + loop_var: host_to_redeploy diff --git a/redeploy/_scheme_rmvm_keepdisk_only/tasks/by_hosttype_by_host.yml b/redeploy/_scheme_rmvm_keepdisk_only/tasks/by_hosttype_by_host.yml index 14276ef3..12d27ac5 100644 --- a/redeploy/_scheme_rmvm_keepdisk_only/tasks/by_hosttype_by_host.yml +++ b/redeploy/_scheme_rmvm_keepdisk_only/tasks/by_hosttype_by_host.yml @@ -1,51 +1,34 @@ --- -- debug: msg="Attempting to redeploy {{_cluster_host_redeploying_loopvar.hostname}}" +- debug: msg="by_hosttype_by_host | Attempting to redeploy {{host_to_redeploy.hostname}}" -- name: by_hosttype_by_host | stop/ remove previous instance - block: - - name: by_hosttype_by_host | run predeleterole role - include_role: - name: "{{predeleterole}}" - when: predeleterole is defined and predeleterole != "" - - - name: by_hosttype_by_host | Power off old VM - include_role: - name: clusterverse/redeploy/__common - tasks_from: poweroff_vms.yml - - - name: by_hosttype_by_host | re-acquire the dynamic inventory - include_role: - name: clusterverse/dynamic_inventory - - - name: by_hosttype_by_host | re-acquire cluster_hosts_target and cluster_hosts_state - import_role: - name: clusterverse/cluster_hosts +- name: by_hosttype_by_host | run predeleterole role + include_role: + name: "{{predeleterole}}" + when: predeleterole is defined and predeleterole != "" - - name: by_hosttype_by_host | create cluster_host_redeploying with the disk info from hosts_to_stop - include_role: - name: "{{role_path}}" - tasks_from: "_add_diskinfo_{{cluster_vars.type}}.yml" +- name: by_hosttype_by_host | Power off old VM + include_role: + name: clusterverse/redeploy/__common + tasks_from: poweroff_vms.yml vars: - _root_cluster_host_redeploying: "{{_cluster_host_redeploying_loopvar.hostname | regex_replace('-(?!.*-).*')}}" #Remove the cluster_suffix from the hostname - hosts_to_stop: "{{ cluster_hosts_state | to_json | from_json | json_query(\"[?tagslabels.lifecycle_state=='retiring' && starts_with(name, '\" + _root_cluster_host_redeploying + \"')]\") }}" - -- name: by_hosttype_by_host | cluster_host_redeploying - debug: msg={{cluster_host_redeploying}} + _host_to_redeploy_nosuffix: "{{host_to_redeploy.hostname | regex_replace('-(?!.*-).*')}}" #Remove the cluster_suffix from the hostname + hosts_to_stop: "{{ cluster_hosts_state | to_json | from_json | json_query(\"[?tagslabels.lifecycle_state=='retiring' && starts_with(name, '\" + _host_to_redeploy_nosuffix + \"')]\") }}" -- name: "by_hosttype_by_host | Run {{mainclusteryml}} to add {{cluster_host_redeploying.hostname}} to cluster" - shell: "{{ (argv | join(' ')) | regex_replace('redeploy.yml', mainclusteryml) }} -e cluster_suffix={{cluster_suffix}} -e '{'cluster_hosts_target': [{{cluster_host_redeploying | to_json}}]}'" +- name: "by_hosttype_by_host | Run {{mainclusteryml}} to add {{host_to_redeploy.hostname}} to cluster" + shell: "{{ (argv | join(' ')) | regex_replace('redeploy.yml', mainclusteryml) }} -e cluster_suffix={{cluster_suffix}} -e '{'cluster_hosts_target': [{{host_to_redeploy | to_json}}]}'" register: r__mainclusteryml no_log: True ignore_errors: yes - debug: msg="{{[r__mainclusteryml.stdout_lines] + [r__mainclusteryml.stderr_lines]}}" failed_when: r__mainclusteryml is failed -# when: r__mainclusteryml is failed or (debug_nested_log_output is defined and debug_nested_log_output|bool) + when: r__mainclusteryml is failed or (debug_nested_log_output is defined and debug_nested_log_output|bool) - name: by_hosttype_by_host | re-acquire the dynamic inventory include_role: name: clusterverse/dynamic_inventory -- name: by_hosttype_by_host | re-acquire cluster_hosts_target and cluster_hosts_state +- name: by_hosttype_by_host | re-acquire cluster_hosts_state (NOT cluster_hosts_target, as we are augmenting this in _add_src_diskinfo_to_cluster_hosts_target__ (also, it is not affected by change of state)) import_role: name: clusterverse/cluster_hosts + tasks_from: get_cluster_hosts_state.yml diff --git a/redeploy/_scheme_rmvm_keepdisk_only/tasks/main.yml b/redeploy/_scheme_rmvm_keepdisk_only/tasks/main.yml index 67aeba86..61680183 100644 --- a/redeploy/_scheme_rmvm_keepdisk_only/tasks/main.yml +++ b/redeploy/_scheme_rmvm_keepdisk_only/tasks/main.yml @@ -3,17 +3,18 @@ - name: Preflight check block: - assert: { that: "_scheme_rmvm_keepdisk_only__copy_or_move is defined and _scheme_rmvm_keepdisk_only__copy_or_move in ['copy', 'move']", msg: "ERROR - _scheme_rmvm_keepdisk_only__copy_or_move must be defined and set to either 'copy' or 'move'" } + when: cluster_vars.type == "esxifree" - assert: { that: "non_current_hosts | length == 0", msg: "ERROR - All VMs must be in the 'current' lifecycle_state. Those not [{{non_current_hosts | join(',')}}]" } vars: non_current_hosts: "{{ cluster_hosts_state | json_query(\"[?tagslabels.lifecycle_state!='current'].name\") }}" - when: canary=="start" or canary=="none" + when: (canary=="start" or canary=="none") and (ignore_lifecycle_check is not defined or ignore_lifecycle_check==false) - assert: - that: "{{chs_hosts | difference(chf_hosts) | length==0}}" - msg: "Cannot use this scheme to redeploy to smaller cluster; [{{ chs_hosts | join(',') }}] > [{{ chf_hosts | join(',') }}]" + that: "{{chs_hosts | difference(cht_hosts) | length==0}}" + msg: "Cannot use this scheme to redeploy to smaller cluster; [{{ chs_hosts | join(',') }}] > [{{ cht_hosts | join(',') }}]" vars: - chf_hosts: "{{ cluster_hosts_target | json_query(\"[].hostname\") | map('regex_replace', '-(?!.*-).*') | list }}" + cht_hosts: "{{ cluster_hosts_target | json_query(\"[].hostname\") | map('regex_replace', '-(?!.*-).*') | list }}" chs_hosts: "{{ cluster_hosts_state | json_query(\"[].name\") | map('regex_replace', '-(?!.*-).*') | list }}" - name: Redeploy setup @@ -36,6 +37,9 @@ when: cluster_suffix is defined when: (canary=="start" or canary=="none") +- name: Add the disk info from previous instances to cluster_hosts_target + include_tasks: "_add_src_diskinfo_to_cluster_hosts_target__{{cluster_vars.type}}.yml" + - name: Run redeploy per hosttype. Create one at a time, then stop previous. include_tasks: by_hosttype.yml with_items: "{{ myhosttypes_array }}" @@ -58,7 +62,7 @@ - include_role: name: clusterverse/clean - tasks_from: "clean_vms_{{cluster_vars.type}}.yml" + tasks_from: clean_vms.yml when: (hosts_to_clean | length) - debug: From 6215a0f117db814b53441e040ec3eb05733164b8 Mon Sep 17 00:00:00 2001 From: Dougal Seeley Date: Wed, 23 Sep 2020 13:09:39 +0100 Subject: [PATCH 07/58] Add rollback to _scheme_rmvm_keepdisk_only and rename _scheme_rmvm_keepdisk_rollback --- .../group_vars/test_aws_euw1/cluster_vars.yml | 4 +- .../group_vars/test_gcp_euw1/cluster_vars.yml | 2 +- _dependencies/callback_plugins/cli_facts.py | 58 ------- _dependencies/vars_plugins/cli_facts.py | 2 +- create/tasks/aws.yml | 8 +- .../tasks/set_lifecycle_state_label.yml | 1 + .../_scheme_rmvm_keepdisk_only/tasks/main.yml | 73 --------- ..._diskinfo_to_cluster_hosts_target__aws.yml | 3 +- ...info_to_cluster_hosts_target__esxifree.yml | 0 .../tasks/by_hosttype.yml | 0 .../tasks/by_hosttype_by_host.yml | 14 +- .../tasks/main.yml | 142 ++++++++++++++++++ 12 files changed, 165 insertions(+), 142 deletions(-) delete mode 100644 _dependencies/callback_plugins/cli_facts.py delete mode 100644 redeploy/_scheme_rmvm_keepdisk_only/tasks/main.yml rename redeploy/{_scheme_rmvm_keepdisk_only => _scheme_rmvm_keepdisk_rollback}/tasks/_add_src_diskinfo_to_cluster_hosts_target__aws.yml (95%) rename redeploy/{_scheme_rmvm_keepdisk_only => _scheme_rmvm_keepdisk_rollback}/tasks/_add_src_diskinfo_to_cluster_hosts_target__esxifree.yml (100%) rename redeploy/{_scheme_rmvm_keepdisk_only => _scheme_rmvm_keepdisk_rollback}/tasks/by_hosttype.yml (100%) rename redeploy/{_scheme_rmvm_keepdisk_only => _scheme_rmvm_keepdisk_rollback}/tasks/by_hosttype_by_host.yml (67%) create mode 100644 redeploy/_scheme_rmvm_keepdisk_rollback/tasks/main.yml diff --git a/EXAMPLE/group_vars/test_aws_euw1/cluster_vars.yml b/EXAMPLE/group_vars/test_aws_euw1/cluster_vars.yml index f5c5c048..98f4f848 100644 --- a/EXAMPLE/group_vars/test_aws_euw1/cluster_vars.yml +++ b/EXAMPLE/group_vars/test_aws_euw1/cluster_vars.yml @@ -1,11 +1,11 @@ --- -redeploy_schemes_supported: ['_scheme_addallnew_rmdisk_rollback', '_scheme_addnewvm_rmdisk_rollback', '_scheme_rmvm_rmdisk_only'] +redeploy_schemes_supported: ['_scheme_addallnew_rmdisk_rollback', '_scheme_addnewvm_rmdisk_rollback', '_scheme_rmvm_rmdisk_only', '_scheme_rmvm_keepdisk_rollback'] #redeploy_scheme: _scheme_addallnew_rmdisk_rollback #redeploy_scheme: _scheme_addnewvm_rmdisk_rollback #redeploy_scheme: _scheme_rmvm_rmdisk_only -#redeploy_scheme: _scheme_rmvm_keepdisk_only +#redeploy_scheme: _scheme_rmvm_keepdisk_rollback app_name: "test" # The name of the application cluster (e.g. 'couchbase', 'nginx'); becomes part of cluster_name. app_class: "test" # The class of application (e.g. 'database', 'webserver'); becomes part of the fqdn diff --git a/EXAMPLE/group_vars/test_gcp_euw1/cluster_vars.yml b/EXAMPLE/group_vars/test_gcp_euw1/cluster_vars.yml index 9e704425..fc82c372 100644 --- a/EXAMPLE/group_vars/test_gcp_euw1/cluster_vars.yml +++ b/EXAMPLE/group_vars/test_gcp_euw1/cluster_vars.yml @@ -9,7 +9,7 @@ redeploy_schemes_supported: ['_scheme_addallnew_rmdisk_rollback', '_scheme_addne redeploy_scheme: _scheme_addallnew_rmdisk_rollback #redeploy_scheme: _scheme_addnewvm_rmdisk_rollback #redeploy_scheme: _scheme_rmvm_rmdisk_only -#redeploy_scheme: _scheme_rmvm_keepdisk_only +#redeploy_scheme: _scheme_rmvm_keepdisk_rollback app_name: "test" # The name of the application cluster (e.g. 'couchbase', 'nginx'); becomes part of cluster_name. app_class: "test" # The class of application (e.g. 'database', 'webserver'); becomes part of the fqdn diff --git a/_dependencies/callback_plugins/cli_facts.py b/_dependencies/callback_plugins/cli_facts.py deleted file mode 100644 index 6620ca80..00000000 --- a/_dependencies/callback_plugins/cli_facts.py +++ /dev/null @@ -1,58 +0,0 @@ -#!/usr/bin/python -# GNU General Public License v3.0+ (see COPYING or https://www.gnu.org/licenses/gpl-3.0.txt) - -from __future__ import absolute_import, division, print_function - -__metaclass__ = type - -ANSIBLE_METADATA = {'metadata_version': '1.2', - 'status': ['preview'], - 'supported_by': 'community'} - -DOCUMENTATION = ''' ---- -module: cli_facts -short_description: Expose the system ARGV and CLI arguments as facts in plays. -version_added: "2.8" -author: "Dougal Seeley" -description: - - Expose the system ARGV and CLI arguments as facts in plays. Two new facts are added: argv and cliargs. -options: -requirements: -''' - -from ansible.plugins.callback import CallbackBase -from ansible.context import CLIARGS -from ansible.cli import CLI - -import sys - -HAS_MODULES = True - - -class CallbackModule(CallbackBase): - CALLBACK_VERSION = 2.0 - CALLBACK_TYPE = 'aggregate' - CALLBACK_NAME = 'cli_facts' - CALLBACK_NEEDS_WHITELIST = False - - def __init__(self, *args, **kwargs): - super(CallbackModule, self).__init__(*args, **kwargs) - self._cliargs = CLIARGS - self._argv = sys.argv - - def set_options(self, task_keys=None, var_options=None, direct=None): - super(CallbackModule, self).set_options(task_keys=task_keys, var_options=var_options, direct=direct) - - def v2_playbook_on_play_start(self, play): - variable_manager = play.get_variable_manager() - - # We cannot put 'localhost' in get_hosts(pattern=['all', 'localhost']) call, because of PR 58400, described below. - hosts = variable_manager._inventory.get_hosts(pattern=['all'], ignore_restrictions=True) - if variable_manager._inventory.localhost: hosts.append(variable_manager._inventory.localhost) - for host in hosts: - # Ansible 2.9 (https://github.com/ansible/ansible/pull/58400) changed the 'host' type in ansible/vars/manager.py::set_host_variable() from type to type string. - if CLI.version_info()['major'] >= 2 and CLI.version_info()['minor'] >= 9: - host = str(host) - variable_manager.set_host_variable(host, "cliargs", dict(self._cliargs)) - variable_manager.set_host_variable(host, "argv", self._argv) diff --git a/_dependencies/vars_plugins/cli_facts.py b/_dependencies/vars_plugins/cli_facts.py index 5cd07ccf..ea113fa7 100644 --- a/_dependencies/vars_plugins/cli_facts.py +++ b/_dependencies/vars_plugins/cli_facts.py @@ -7,7 +7,7 @@ DOCUMENTATION = ''' --- -cars: cli_facts +vars: argv, cliargs short_description: Expose the system ARGV and CLI arguments as facts in plays. version_added: "2.8" author: "Dougal Seeley" diff --git a/create/tasks/aws.yml b/create/tasks/aws.yml index 8300042a..af0ec685 100644 --- a/create/tasks/aws.yml +++ b/create/tasks/aws.yml @@ -20,7 +20,7 @@ - name: create/aws | Create EC2 VMs asynchronously and wait for completion block: - - name: create/aws | Detach volumes from previous instances (during the _scheme_rmvm_keepdisk_only redeploy, we only redeploy one host at a time, and it is already powered off) + - name: create/aws | Detach volumes from previous instances (during the _scheme_rmvm_keepdisk_rollback redeploy, we only redeploy one host at a time, and it is already powered off) ec2_vol: aws_access_key: "{{cluster_vars[buildenv].aws_access_key}}" aws_secret_key: "{{cluster_vars[buildenv].aws_secret_key}}" @@ -72,14 +72,14 @@ retries: 300 with_items: "{{r__ec2.results}}" - - name: create/aws | r__async_status__ec2.results - debug: msg={{r__async_status__ec2.results}} +# - name: create/aws | r__async_status__ec2.results +# debug: msg={{r__async_status__ec2.results}} - name: create/aws | Set a fact containing the newly-created hosts set_fact: cluster_hosts_created: "{{ r__async_status__ec2.results | json_query(\"[?changed==`true`].item.item\") }}" - - name: create/aws | Create new volumes asynchronously (or attach existing if src is present, e.g. via the _scheme_rmvm_keepdisk_only scheme) + - name: create/aws | Create new volumes asynchronously (or attach existing if src is present, e.g. via the _scheme_rmvm_keepdisk_rollback scheme) ec2_vol: aws_access_key: "{{cluster_vars[buildenv].aws_access_key}}" aws_secret_key: "{{cluster_vars[buildenv].aws_secret_key}}" diff --git a/redeploy/__common/tasks/set_lifecycle_state_label.yml b/redeploy/__common/tasks/set_lifecycle_state_label.yml index 93c582be..093a50be 100644 --- a/redeploy/__common/tasks/set_lifecycle_state_label.yml +++ b/redeploy/__common/tasks/set_lifecycle_state_label.yml @@ -1,4 +1,5 @@ --- + - name: set_lifecycle_state_label | hosts_to_relabel debug: msg="{{hosts_to_relabel}}" diff --git a/redeploy/_scheme_rmvm_keepdisk_only/tasks/main.yml b/redeploy/_scheme_rmvm_keepdisk_only/tasks/main.yml deleted file mode 100644 index 61680183..00000000 --- a/redeploy/_scheme_rmvm_keepdisk_only/tasks/main.yml +++ /dev/null @@ -1,73 +0,0 @@ ---- - -- name: Preflight check - block: - - assert: { that: "_scheme_rmvm_keepdisk_only__copy_or_move is defined and _scheme_rmvm_keepdisk_only__copy_or_move in ['copy', 'move']", msg: "ERROR - _scheme_rmvm_keepdisk_only__copy_or_move must be defined and set to either 'copy' or 'move'" } - when: cluster_vars.type == "esxifree" - - - assert: { that: "non_current_hosts | length == 0", msg: "ERROR - All VMs must be in the 'current' lifecycle_state. Those not [{{non_current_hosts | join(',')}}]" } - vars: - non_current_hosts: "{{ cluster_hosts_state | json_query(\"[?tagslabels.lifecycle_state!='current'].name\") }}" - when: (canary=="start" or canary=="none") and (ignore_lifecycle_check is not defined or ignore_lifecycle_check==false) - - - assert: - that: "{{chs_hosts | difference(cht_hosts) | length==0}}" - msg: "Cannot use this scheme to redeploy to smaller cluster; [{{ chs_hosts | join(',') }}] > [{{ cht_hosts | join(',') }}]" - vars: - cht_hosts: "{{ cluster_hosts_target | json_query(\"[].hostname\") | map('regex_replace', '-(?!.*-).*') | list }}" - chs_hosts: "{{ cluster_hosts_state | json_query(\"[].name\") | map('regex_replace', '-(?!.*-).*') | list }}" - -- name: Redeploy setup - block: - - name: Change lifecycle_state label from 'current' to 'retiring' - include_role: - name: clusterverse/redeploy/__common - tasks_from: set_lifecycle_state_label.yml - vars: - hosts_to_relabel: "{{ cluster_hosts_state | json_query(\"[?tagslabels.lifecycle_state=='current']\") }}" - new_state: "retiring" - when: ('retiring' not in (cluster_hosts_state | map(attribute='tagslabels.lifecycle_state'))) - - - name: re-acquire cluster_hosts_target and cluster_hosts_state - include_role: - name: clusterverse/cluster_hosts - public: yes - - - assert: { that: "cluster_hosts_state | json_query(\"[?tagslabels.cluster_suffix == '\"+ cluster_suffix +\"']\") | length == 0", msg: "Please ensure cluster_suffix ({{cluster_suffix}}) is not already set on the cluster" } - when: cluster_suffix is defined - when: (canary=="start" or canary=="none") - -- name: Add the disk info from previous instances to cluster_hosts_target - include_tasks: "_add_src_diskinfo_to_cluster_hosts_target__{{cluster_vars.type}}.yml" - -- name: Run redeploy per hosttype. Create one at a time, then stop previous. - include_tasks: by_hosttype.yml - with_items: "{{ myhosttypes_array }}" - loop_control: - loop_var: hosttype - vars: - cluster_hosts_target_by_hosttype: "{{cluster_hosts_target | dict_agg('hosttype')}}" - myhosttypes_array: "{%- if myhosttypes is defined -%} {{ myhosttypes.split(',') }} {%- else -%} {{ cluster_hosts_target_by_hosttype.keys() | list }} {%- endif -%}" - when: canary!="tidy" - - -- name: "Tidy up powered-down, non-current instances. NOTE: Must do clean_dns first, because both clean_dns and clean_vms have the cluster_hosts role as a dependency, which when run after clean_vms, will be empty." - block: - - assert: { that: "'current' in (cluster_hosts_state | map(attribute='tagslabels.lifecycle_state'))", msg: "ERROR - Cannot tidy when there are no machines in the 'current' lifecycle_state. Please use '-e clean=_all_'." } - - - include_role: - name: clusterverse/clean - tasks_from: clean_dns.yml - when: (hosts_to_clean | length) and (cluster_vars.dns_server is defined and cluster_vars.dns_server != "") and (cluster_vars.dns_user_domain is defined and cluster_vars.dns_user_domain != "") - - - include_role: - name: clusterverse/clean - tasks_from: clean_vms.yml - when: (hosts_to_clean | length) - - - debug: - msg: "tidy | No hosts to tidy. Only powered-down, non-current machines with be tidied; to clean other machines, please use the '-e clean=' extra variable." - when: hosts_to_clean | length == 0 - vars: - hosts_to_clean: "{{ cluster_hosts_state | json_query(\"[?tagslabels.lifecycle_state!='current' && !(contains('RUNNING,running', instance_state))]\") }}" - when: canary=="tidy" or ((canary=="none" or canary=="finish") and canary_tidy_on_success is defined and canary_tidy_on_success|bool) diff --git a/redeploy/_scheme_rmvm_keepdisk_only/tasks/_add_src_diskinfo_to_cluster_hosts_target__aws.yml b/redeploy/_scheme_rmvm_keepdisk_rollback/tasks/_add_src_diskinfo_to_cluster_hosts_target__aws.yml similarity index 95% rename from redeploy/_scheme_rmvm_keepdisk_only/tasks/_add_src_diskinfo_to_cluster_hosts_target__aws.yml rename to redeploy/_scheme_rmvm_keepdisk_rollback/tasks/_add_src_diskinfo_to_cluster_hosts_target__aws.yml index fda28843..20bde44d 100644 --- a/redeploy/_scheme_rmvm_keepdisk_only/tasks/_add_src_diskinfo_to_cluster_hosts_target__aws.yml +++ b/redeploy/_scheme_rmvm_keepdisk_rollback/tasks/_add_src_diskinfo_to_cluster_hosts_target__aws.yml @@ -5,7 +5,6 @@ filters: "instance-state-name": ["running", "stopped"] "tag:cluster_name": "{{cluster_name}}" - "tag:lifecycle_state": "retiring" aws_access_key: "{{cluster_vars[buildenv].aws_access_key}}" aws_secret_key: "{{cluster_vars[buildenv].aws_secret_key}}" region: "{{cluster_vars.region}}" @@ -19,7 +18,7 @@ cluster_hosts_target: | {%- for cht_host in cluster_hosts_target -%} {%- for cht_autovol in cht_host.auto_volumes -%} - {%- for chs_host_info_result in r__ec2_instance_info.instances -%} + {%- for chs_host_info_result in r__ec2_instance_info.instances | selectattr('tags.lifecycle_state', '!=', 'current') -%} {%- if cht_host.hostname | regex_replace('-(?!.*-).*') == chs_host_info_result.tags.Name | regex_replace('-(?!.*-).*') -%} {%- for chs_host_diskinfo in chs_host_info_result.block_device_mappings | selectattr('device_name', '==', cht_autovol.device_name) -%} {%- set _ = cht_autovol.update({'src': {'instance_id': chs_host_info_result.instance_id, 'volume_id': chs_host_diskinfo.ebs.volume_id }}) -%} diff --git a/redeploy/_scheme_rmvm_keepdisk_only/tasks/_add_src_diskinfo_to_cluster_hosts_target__esxifree.yml b/redeploy/_scheme_rmvm_keepdisk_rollback/tasks/_add_src_diskinfo_to_cluster_hosts_target__esxifree.yml similarity index 100% rename from redeploy/_scheme_rmvm_keepdisk_only/tasks/_add_src_diskinfo_to_cluster_hosts_target__esxifree.yml rename to redeploy/_scheme_rmvm_keepdisk_rollback/tasks/_add_src_diskinfo_to_cluster_hosts_target__esxifree.yml diff --git a/redeploy/_scheme_rmvm_keepdisk_only/tasks/by_hosttype.yml b/redeploy/_scheme_rmvm_keepdisk_rollback/tasks/by_hosttype.yml similarity index 100% rename from redeploy/_scheme_rmvm_keepdisk_only/tasks/by_hosttype.yml rename to redeploy/_scheme_rmvm_keepdisk_rollback/tasks/by_hosttype.yml diff --git a/redeploy/_scheme_rmvm_keepdisk_only/tasks/by_hosttype_by_host.yml b/redeploy/_scheme_rmvm_keepdisk_rollback/tasks/by_hosttype_by_host.yml similarity index 67% rename from redeploy/_scheme_rmvm_keepdisk_only/tasks/by_hosttype_by_host.yml rename to redeploy/_scheme_rmvm_keepdisk_rollback/tasks/by_hosttype_by_host.yml index 12d27ac5..bcc20497 100644 --- a/redeploy/_scheme_rmvm_keepdisk_only/tasks/by_hosttype_by_host.yml +++ b/redeploy/_scheme_rmvm_keepdisk_rollback/tasks/by_hosttype_by_host.yml @@ -13,7 +13,7 @@ tasks_from: poweroff_vms.yml vars: _host_to_redeploy_nosuffix: "{{host_to_redeploy.hostname | regex_replace('-(?!.*-).*')}}" #Remove the cluster_suffix from the hostname - hosts_to_stop: "{{ cluster_hosts_state | to_json | from_json | json_query(\"[?tagslabels.lifecycle_state=='retiring' && starts_with(name, '\" + _host_to_redeploy_nosuffix + \"')]\") }}" + hosts_to_stop: "{{ cluster_hosts_state | to_json | from_json | json_query(\"[?tagslabels.lifecycle_state!='current' && starts_with(name, '\" + _host_to_redeploy_nosuffix + \"')]\") }}" - name: "by_hosttype_by_host | Run {{mainclusteryml}} to add {{host_to_redeploy.hostname}} to cluster" shell: "{{ (argv | join(' ')) | regex_replace('redeploy.yml', mainclusteryml) }} -e cluster_suffix={{cluster_suffix}} -e '{'cluster_hosts_target': [{{host_to_redeploy | to_json}}]}'" @@ -24,6 +24,18 @@ failed_when: r__mainclusteryml is failed when: r__mainclusteryml is failed or (debug_nested_log_output is defined and debug_nested_log_output|bool) +- name: by_hosttype_by_host | re-acquire cluster_hosts_state (NOT cluster_hosts_target, as we are augmenting this in _add_src_diskinfo_to_cluster_hosts_target__ (also, it is not affected by change of state)) + import_role: + name: clusterverse/cluster_hosts + tasks_from: get_cluster_hosts_state.yml + +- name: by_hosttype_by_host | Power on new VM (not needed for normal redeploy, but for rescue case) + include_role: + name: clusterverse/redeploy/__common + tasks_from: poweron_vms.yml + vars: + hosts_to_start: "{{ cluster_hosts_state | selectattr('name', '==', host_to_redeploy.hostname) | list }}" + - name: by_hosttype_by_host | re-acquire the dynamic inventory include_role: name: clusterverse/dynamic_inventory diff --git a/redeploy/_scheme_rmvm_keepdisk_rollback/tasks/main.yml b/redeploy/_scheme_rmvm_keepdisk_rollback/tasks/main.yml new file mode 100644 index 00000000..328599e7 --- /dev/null +++ b/redeploy/_scheme_rmvm_keepdisk_rollback/tasks/main.yml @@ -0,0 +1,142 @@ +--- + +- name: Preflight check + block: + - block: + - name: Preflight check | ec2_instance_info + ec2_instance_info: + filters: { "instance-state-name": [ "running", "stopped" ], "tag:cluster_name": "{{cluster_name}}", "tag:lifecycle_state": "current" } + aws_access_key: "{{cluster_vars[buildenv].aws_access_key}}" + aws_secret_key: "{{cluster_vars[buildenv].aws_secret_key}}" + region: "{{cluster_vars.region}}" + register: r__ec2_instance_info + + - assert: { that: "_invalid_disks | length == 0", msg: "EBS disks with a device_name of /dev/sd[b-e] cannot be reattached to a new instance (an AWS limitation) [found on: {{ _invalid_disks | join(',')}}]. To replace these, you must use a redeploy scheme that copies the disks." } + vars: { _invalid_disks: "{{ r__ec2_instance_info.instances | json_query(\"[?block_device_mappings[?contains(`/dev/sdb,/dev/sdc,/dev/sdd,/dev/sde`, device_name)]].tags.Name\") }}" } + when: cluster_vars.type == "aws" + + - assert: { that: "_scheme_rmvm_keepdisk_only__copy_or_move is defined and _scheme_rmvm_keepdisk_only__copy_or_move in ['copy', 'move']", msg: "ERROR - _scheme_rmvm_keepdisk_only__copy_or_move must be defined and set to either 'copy' or 'move'" } + when: cluster_vars.type == "esxifree" + + - assert: { that: "non_current_hosts | length == 0", msg: "ERROR - All VMs must be in the 'current' lifecycle_state. Those not [{{non_current_hosts | join(',')}}]" } + vars: + non_current_hosts: "{{ cluster_hosts_state | json_query(\"[?tagslabels.lifecycle_state!='current'].name\") }}" + when: (canary=="start" or canary=="none") and (ignore_lifecycle_check is not defined or ignore_lifecycle_check==false) + + - assert: + that: "{{chs_hosts | difference(cht_hosts) | length==0}}" + msg: "Cannot use this scheme to redeploy to smaller cluster; [{{ chs_hosts | join(',') }}] > [{{ cht_hosts | join(',') }}]" + vars: + cht_hosts: "{{ cluster_hosts_target | json_query(\"[].hostname\") | map('regex_replace', '-(?!.*-).*') | list }}" + chs_hosts: "{{ cluster_hosts_state | json_query(\"[].name\") | map('regex_replace', '-(?!.*-).*') | list }}" + +- name: Redeploy by hosttype; rollback on fail + block: + - name: Redeploy setup + block: + - name: Change lifecycle_state label from 'current' to 'retiring' + include_role: + name: clusterverse/redeploy/__common + tasks_from: set_lifecycle_state_label.yml + vars: + hosts_to_relabel: "{{ cluster_hosts_state | json_query(\"[?tagslabels.lifecycle_state=='current']\") }}" + new_state: "retiring" + when: ('retiring' not in (cluster_hosts_state | map(attribute='tagslabels.lifecycle_state'))) + + - name: re-acquire cluster_hosts_target and cluster_hosts_state + include_role: + name: clusterverse/cluster_hosts + public: yes + + - assert: { that: "cluster_hosts_state | json_query(\"[?tagslabels.cluster_suffix == '\"+ cluster_suffix +\"']\") | length == 0", msg: "Please ensure cluster_suffix ({{cluster_suffix}}) is not already set on the cluster" } + when: cluster_suffix is defined + when: (canary=="start" or canary=="none") + + - name: Add the disk info from previous instances to cluster_hosts_target + include_tasks: "_add_src_diskinfo_to_cluster_hosts_target__{{cluster_vars.type}}.yml" + + - name: Run redeploy per hosttype. Create one at a time, then stop previous. + include_tasks: by_hosttype.yml + with_items: "{{ myhosttypes_array }}" + loop_control: + loop_var: hosttype + vars: + cluster_hosts_target_by_hosttype: "{{cluster_hosts_target | dict_agg('hosttype')}}" + myhosttypes_array: "{%- if myhosttypes is defined -%} {{ myhosttypes.split(',') }} {%- else -%} {{ cluster_hosts_target_by_hosttype.keys() | list }} {%- endif -%}" + + - fail: + when: testfail is defined and testfail == "fail_1" + + - name: re-acquire cluster_hosts_target and cluster_hosts_state (For the '-e canary=tidy' option. This can't be run in the tidy block below because that block depends on this info being correct) + import_role: + name: clusterverse/cluster_hosts + when: (canary_tidy_on_success is defined and canary_tidy_on_success|bool) + + rescue: + - debug: msg="Rescuing" + + - name: rescue | re-acquire cluster_hosts_target and cluster_hosts_state + import_role: + name: clusterverse/cluster_hosts + + - name: rescue | Change lifecycle_state label from 'current' to 'redeployfail' + include_role: + name: clusterverse/redeploy/__common + tasks_from: set_lifecycle_state_label.yml + vars: + hosts_to_relabel: "{{ cluster_hosts_state | json_query(\"[?tagslabels.lifecycle_state=='current']\") }}" + new_state: "redeployfail" + + - name: rescue | Change lifecycle_state label from 'retiring' to 'current' + include_role: + name: clusterverse/redeploy/__common + tasks_from: set_lifecycle_state_label.yml + vars: + hosts_to_relabel: "{{ cluster_hosts_state | json_query(\"[?tagslabels.lifecycle_state=='retiring']\") }}" + new_state: "current" + + - name: rescue | re-acquire cluster_hosts_target and cluster_hosts_state + import_role: + name: clusterverse/cluster_hosts + + - name: rescue | Add the disk info from previous instances to cluster_hosts_target + include_tasks: "_add_src_diskinfo_to_cluster_hosts_target__{{cluster_vars.type}}.yml" + + - name: rescue | explicitly specify only the relevant cluster.yml roles to run for rescuing + set_fact: + argv: "{{argv + ['--tags'] + ['clusterverse_create,clusterverse_dynamic_inventory,clusterverse_readiness'] }}" + + - name: rescue | Run redeploy per hosttype. Create one at a time, then stop previous. + include_tasks: by_hosttype.yml + with_items: "{{ myhosttypes_array }}" + loop_control: + loop_var: hosttype + vars: + cluster_hosts_target_by_hosttype: "{{cluster_hosts_target | dict_agg('hosttype')}}" + myhosttypes_array: "{%- if myhosttypes is defined -%} {{ myhosttypes.split(',') }} {%- else -%} {{ cluster_hosts_target_by_hosttype.keys() | list }} {%- endif -%}" + + - name: rescue | end_play to prevent tidying of pre-rescued VMs + meta: end_play + when: canary!="tidy" + + +- name: "Tidy up powered-down, non-current instances. NOTE: Must do clean_dns first, because both clean_dns and clean_vms have the cluster_hosts role as a dependency, which when run after clean_vms, will be empty." + block: + - assert: { that: "'current' in (cluster_hosts_state | map(attribute='tagslabels.lifecycle_state'))", msg: "ERROR - Cannot tidy when there are no machines in the 'current' lifecycle_state. Please use '-e clean=_all_'." } + + - include_role: + name: clusterverse/clean + tasks_from: clean_dns.yml + when: (hosts_to_clean | length) and (cluster_vars.dns_server is defined and cluster_vars.dns_server != "") and (cluster_vars.dns_user_domain is defined and cluster_vars.dns_user_domain != "") + + - include_role: + name: clusterverse/clean + tasks_from: clean_vms.yml + when: (hosts_to_clean | length) + + - debug: + msg: "tidy | No hosts to tidy. Only powered-down, non-current machines with be tidied; to clean other machines, please use the '-e clean=' extra variable." + when: hosts_to_clean | length == 0 + vars: + hosts_to_clean: "{{ cluster_hosts_state | json_query(\"[?tagslabels.lifecycle_state!='current' && !(contains('RUNNING,running', instance_state))]\") }}" + when: canary=="tidy" or ((canary=="none" or canary=="finish") and canary_tidy_on_success is defined and canary_tidy_on_success|bool) From dba5dfda5832c7b8c007b5ce42b0a22ea8262209 Mon Sep 17 00:00:00 2001 From: Dougal Seeley Date: Mon, 28 Sep 2020 13:00:26 +0100 Subject: [PATCH 08/58] Update to support rollback --- README.md | 13 +- _dependencies/library/esxifree_guest.py | 199 ++++++++++-------- create/tasks/aws.yml | 17 -- create/tasks/esxifree.yml | 4 +- create/tasks/main.yml | 18 ++ redeploy/__common/tasks/poweroff_vms.yml | 3 +- redeploy/__common/tasks/poweron_vms.yml | 5 +- .../tasks/set_lifecycle_state_label.yml | 2 +- 8 files changed, 150 insertions(+), 111 deletions(-) diff --git a/README.md b/README.md index f6a00e5f..33b6c03e 100644 --- a/README.md +++ b/README.md @@ -111,7 +111,7 @@ The role is designed to run in two modes: + For each node in the cluster: + Run `predeleterole` + Delete the node - + Run the main cluster.yml, which forces the missing node to be redeployed. Run with the same parameters as for the main playbook. + + Run the main cluster.yml (with the same parameters as for the main playbook), which forces the missing node to be redeployed (the `cluster_suffix` remains the same). + If the process fails at any point: + No further VMs will be deleted or rebuilt - the playbook stops. + **_scheme_addnewvm_rmdisk_rollback** @@ -130,3 +130,14 @@ The role is designed to run in two modes: + The old VMs are stopped. + If the process fails for any reason, the old VMs are reinstated, and the new VMs stopped (rollback) + To delete the old VMs, either set '-e canary_tidy_on_success=true', or call redeploy.yml with '-e canary=tidy' + + **_scheme_rmvm_keepdisk_rollback** + + _Cluster topology must remain identical_ + + **It assumes a resilient deployment (it can tolerate one node being removed from the cluster).** + + For each node in the cluster: + + Run `predeleterole` + + Stop the node + + Detach the disks from the old node + + Run the main cluster.yml to create a new node + + Attach disks to new node + + If the process fails for any reason, the old VMs are reinstated (and the disks reattached to the old nodes), and the new VMs stopped (rollback) + + To delete the old VMs, either set '-e canary_tidy_on_success=true', or call redeploy.yml with '-e canary=tidy' diff --git a/_dependencies/library/esxifree_guest.py b/_dependencies/library/esxifree_guest.py index 481dbbf9..5523a089 100644 --- a/_dependencies/library/esxifree_guest.py +++ b/_dependencies/library/esxifree_guest.py @@ -331,6 +331,7 @@ sample: None ''' +import os import time import re import json @@ -756,37 +757,6 @@ def create_vm(self, vmTemplate=None, annotation=None, datastore=None, hardware=N if len(bootDisks) == 1: return ("Boot disk parameters defined for cloned VM. Ambiguous requirement - not supported.") - newDisks = [newDisk for newDisk in disks if 'boot' not in newDisk] - for newDiskCount,newDisk in enumerate(newDisks): - scsiDiskIdx = newDiskCount + diskCount - disk_filename = self.name + "--" + newDisk['volname'] + ".vmdk" - - #Check if new disk already exists - if so, exit - try: - (stdin, stdout, stderr) = self.esxiCnx.exec_command("stat " + vmPathDest + "/" + disk_filename) - except IOError as e: - if 'src' in newDisk and newDisk['src'] is not None: - cloneSrcBackingFile = re.search('^\[(?P.*?)\] *(?P.*\/(?P(?P.*?)(?:--(?P.*?))?\.vmdk))$', newDisk['src']['backing_filename']) - try: - (stdin, stdout, stderr) = self.esxiCnx.exec_command("stat /vmfs/volumes/" + cloneSrcBackingFile.group('datastore') + "/" + cloneSrcBackingFile.group('fulldiskpath')) - except IOError as e: - return (cloneSrcBackingFile.group('fulldiskpath') + " not found!\n" + str(e)) - else: - if newDisk['src']['copy_or_move'] == 'copy': - self.esxiCnx.exec_command("vmkfstools -i /vmfs/volumes/" + cloneSrcBackingFile.group('datastore') + "/" + cloneSrcBackingFile.group('fulldiskpath') + " -d thin " + vmPathDest + "/" + disk_filename) - else: - self.esxiCnx.exec_command("vmkfstools -E /vmfs/volumes/" + cloneSrcBackingFile.group('datastore') + "/" + cloneSrcBackingFile.group('fulldiskpath') + " " + vmPathDest + "/" + disk_filename) - - else: - (stdin, stdout, stderr) = self.esxiCnx.exec_command("vmkfstools -c " + str(newDisk['size_gb']) + "G -d " + newDisk['type'] + " " + vmPathDest + "/" + disk_filename) - - vmxDict.update({"scsi0:" + str(scsiDiskIdx) + ".devicetype": "scsi-hardDisk"}) - vmxDict.update({"scsi0:" + str(scsiDiskIdx) + ".present": "TRUE"}) - vmxDict.update({"scsi0:" + str(scsiDiskIdx) + ".filename": disk_filename}) - diskCount = diskCount + 1 - else: - return (disk_filename + " already present!") - # write the vmx self.put_vmx(vmxDict, vmPathDest + "/" + self.name + ".vmx") @@ -794,7 +764,11 @@ def create_vm(self, vmTemplate=None, annotation=None, datastore=None, hardware=N (stdin, stdout, stderr) = self.esxiCnx.exec_command("vim-cmd solo/registervm " + vmPathDest + "/" + self.name + ".vmx") self.moid = int(stdout.readlines()[0]) - def update_vm(self, annotation=None): + # The logic used to update the disks is the same for an existing as a new VM. + self.update_vm(annotation=None, disks=disks) + + def update_vm(self, annotation, disks): + vmxPath, vmxDict = self.get_vmx(self.moid) if annotation: # Update the config (annotation) in the running VM response, cookies = self.soap_client.send_req('<_this type="VirtualMachine">' + str(self.moid) + '' + annotation + '') @@ -803,9 +777,46 @@ def update_vm(self, annotation=None): return ("Failed to ReconfigVM_Task: %s" % waitresp) # Now update the disk (should not be necessary, but for some reason, sometimes the ReconfigVM_Task does not flush config to disk). - vmxPath, vmxDict = self.get_vmx(self.moid) vmxDict.update({"annotation": annotation}) - self.put_vmx(vmxDict, vmxPath) + + if disks: + curDisks = [{"filename": vmxDict[scsiDisk], "volname": re.sub(r".*--([\w\d]+)\.vmdk", r"\1", vmxDict[scsiDisk])} for scsiDisk in sorted(vmxDict) if re.match(r"scsi0:\d\.filename", scsiDisk)] + curDisksCount = len(curDisks) + newDisks = [newDisk for newDisk in disks if ('boot' not in newDisk or newDisk['boot'] == False)] + for newDiskCount,newDisk in enumerate(newDisks): + scsiDiskIdx = newDiskCount + curDisksCount + disk_filename = self.name + "--" + newDisk['volname'] + ".vmdk" + + #Don't clone already existing disks + try: + (stdin, stdout, stderr) = self.esxiCnx.exec_command("stat " + os.path.dirname(vmxPath) + "/" + disk_filename) + except IOError as e: + if 'src' in newDisk and newDisk['src'] is not None: + cloneSrcBackingFile = re.search('^\[(?P.*?)\] *(?P.*\/(?P(?P.*?)(?:--(?P.*?))?\.vmdk))$', newDisk['src']['backing_filename']) + try: + (stdin, stdout, stderr) = self.esxiCnx.exec_command("stat /vmfs/volumes/" + cloneSrcBackingFile.group('datastore') + "/" + cloneSrcBackingFile.group('fulldiskpath')) + except IOError as e: + return (cloneSrcBackingFile.group('fulldiskpath') + " not found!\n" + str(e)) + else: + if newDisk['src']['copy_or_move'] == 'copy': + self.esxiCnx.exec_command("vmkfstools -i /vmfs/volumes/" + cloneSrcBackingFile.group('datastore') + "/" + cloneSrcBackingFile.group('fulldiskpath') + " -d thin " + os.path.dirname(vmxPath) + "/" + disk_filename) + else: + self.esxiCnx.exec_command("vmkfstools -E /vmfs/volumes/" + cloneSrcBackingFile.group('datastore') + "/" + cloneSrcBackingFile.group('fulldiskpath') + " " + os.path.dirname(vmxPath) + "/" + disk_filename) + + else: + (stdin, stdout, stderr) = self.esxiCnx.exec_command("vmkfstools -c " + str(newDisk['size_gb']) + "G -d " + newDisk['type'] + " " + os.path.dirname(vmxPath) + "/" + disk_filename) + + # if this is a new disk, not a restatement of an existing disk: + if len(curDisks) >= newDiskCount+2 and curDisks[newDiskCount+1]['volname'] == newDisk['volname']: + pass + else: + vmxDict.update({"scsi0:" + str(scsiDiskIdx) + ".devicetype": "scsi-hardDisk"}) + vmxDict.update({"scsi0:" + str(scsiDiskIdx) + ".present": "TRUE"}) + vmxDict.update({"scsi0:" + str(scsiDiskIdx) + ".filename": disk_filename}) + curDisksCount = curDisksCount + 1 + + self.put_vmx(vmxDict, vmxPath) + self.esxiCnx.exec_command("vim-cmd vmsvc/reload " + str(self.moid)) # def update_vm_pyvmomi(self, annotation=None): # if annotation: @@ -845,7 +856,7 @@ def main(): "name": {"type": "str"}, "moid": {"type": "str"}, "template": {"type": "str"}, - "state": {"type": "str", "default": 'present', "choices": ['absent', 'present', 'rebootguest', 'poweredon', 'poweredoff', 'shutdownguest']}, + "state": {"type": "str", "default": 'present', "choices": ['absent', 'present', 'unchanged', 'rebootguest', 'poweredon', 'poweredoff', 'shutdownguest']}, "force": {"type": "bool", "default": False}, "datastore": {"type": "str"}, "annotation": {"type": "str", "default": ""}, @@ -890,23 +901,24 @@ class cDummyAnsibleModule(): ## Clone VM params = { - "annotation": "{'lifecycle_state': 'current', 'Name': 'test-prod-sys-a0-1589979249', 'cluster_suffix': '1589979249', 'hosttype': 'sys', 'cluster_name': 'test-prod', 'env': 'prod', 'owner': 'dougal'}", + "annotation": None, + # "annotation": "{'lifecycle_state': 'current', 'Name': 'test-prod-sys-a0-1589979249', 'cluster_suffix': '1589979249', 'hosttype': 'sys', 'cluster_name': 'test-prod', 'env': 'prod', 'owner': 'dougal'}", "cdrom": {"type": "client"}, "cloudinit_userdata": [], "customvalues": [], "datastore": "4tb-evo860-ssd", # "disks": [{"size_gb": 1, "type": "thin", "volname": "test"}], - "disks": [{"size_gb": 1, "type": "thin", "volname": "test_new"}, {"size_gb": 1, "type": "thin", "volname": "test_clone", "src": {"backing_filename": "[4tb-evo860-ssd] parsnip-dev-sys-a0-blue/parsnip-dev-sys-a0-blue--webdata.vmdk", "copy_or_move": "copy"}}], + "disks": [{"size_gb": 1, "type": "thin", "volname": "test", "src": {"backing_filename": "[4tb-evo860-ssd] testdisks-dev-sys-a0-1601204786/testdisks-dev-sys-a0-1601204786--test.vmdk", "copy_or_move": "move"}}], "force": False, "guest_id": "ubuntu-64", "hardware": {"memory_mb": "2048", "num_cpus": "2", "version": "15"}, "hostname": "192.168.1.3", "moid": None, - "name": "gold-alpine-test1", + "name": "testdisks-dev-sys-a0-1601205102", "networks": [{"cloudinit_netplan": {"ethernets": {"eth0": {"dhcp4": True}}}, "networkName": "VM Network", "virtualDev": "vmxnet3"}], "password": sys.argv[2], "state": "present", - "template": "gold-alpine", + "template": "gold-ubuntu2004-20200912150257", "username": "svc", "wait": True, "wait_timeout": 180 @@ -941,7 +953,16 @@ def fail_json(self, msg): module.fail_json(msg="If VM doesn't already exist, you must provide a name for it") # Check if the VM exists before continuing - if module.params['state'] == 'shutdownguest': + if module.params['state'] == 'unchanged': + if iScraper.moid is not None: + updateVmResult = iScraper.update_vm(annotation=module.params['annotation'], disks=module.params['disks']) + if updateVmResult != None: + module.fail_json(msg=updateVmResult) + module.exit_json(changed=True, meta={"msg": "Shutdown " + iScraper.name + ": " + str(iScraper.moid)}) + else: + module.fail_json(msg="VM doesn't exist.") + + elif module.params['state'] == 'shutdownguest': if iScraper.moid: iScraper.soap_client.send_req('<_this type="VirtualMachine">' + str(iScraper.moid) + '') time_s = 60 @@ -958,19 +979,27 @@ def fail_json(self, msg): elif module.params['state'] == 'poweredon': if iScraper.moid: - response, cookies = iScraper.soap_client.send_req('<_this type="VirtualMachine">' + str(iScraper.moid) + '') - if iScraper.soap_client.wait_for_task(xml.dom.minidom.parseString(response.read()).getElementsByTagName('returnval')[0].firstChild.data, int(module.params['wait_timeout'])) != 'success': - module.fail_json(msg="Failed to PowerOnVM_Task") - module.exit_json(changed=True, meta={"msg": "Powered-on " + iScraper.name + ": " + str(iScraper.moid)}) + (stdin, stdout, stderr) = iScraper.esxiCnx.exec_command("vim-cmd vmsvc/power.getstate " + str(iScraper.moid)) + if re.search('Powered off', stdout.read().decode('UTF-8')) is not None: + response, cookies = iScraper.soap_client.send_req('<_this type="VirtualMachine">' + str(iScraper.moid) + '') + if iScraper.soap_client.wait_for_task(xml.dom.minidom.parseString(response.read()).getElementsByTagName('returnval')[0].firstChild.data, int(module.params['wait_timeout'])) != 'success': + module.fail_json(msg="Failed to PowerOnVM_Task") + module.exit_json(changed=True, meta={"msg": "Powered-on " + iScraper.name + ": " + str(iScraper.moid)}) + else: + module.exit_json(changed=False, meta={"msg": "VM " + iScraper.name + ": already on."}) else: module.fail_json(msg="VM doesn't exist.") elif module.params['state'] == 'poweredoff': if iScraper.moid: - response, cookies = iScraper.soap_client.send_req('<_this type="VirtualMachine">' + str(iScraper.moid) + '') - if iScraper.soap_client.wait_for_task(xml.dom.minidom.parseString(response.read()).getElementsByTagName('returnval')[0].firstChild.data, int(module.params['wait_timeout'])) != 'success': - module.fail_json(msg="Failed to PowerOffVM_Task") - module.exit_json(changed=True, meta={"msg": "Powered-off " + iScraper.name + ": " + str(iScraper.moid)}) + (stdin, stdout, stderr) = iScraper.esxiCnx.exec_command("vim-cmd vmsvc/power.getstate " + str(iScraper.moid)) + if re.search('Powered on', stdout.read().decode('UTF-8')) is not None: + response, cookies = iScraper.soap_client.send_req('<_this type="VirtualMachine">' + str(iScraper.moid) + '') + if iScraper.soap_client.wait_for_task(xml.dom.minidom.parseString(response.read()).getElementsByTagName('returnval')[0].firstChild.data, int(module.params['wait_timeout'])) != 'success': + module.fail_json(msg="Failed to PowerOffVM_Task") + module.exit_json(changed=True, meta={"msg": "Powered-off " + iScraper.name + ": " + str(iScraper.moid)}) + else: + module.exit_json(changed=False, meta={"msg": "VM " + iScraper.name + ": already off."}) else: module.fail_json(msg="VM doesn't exist.") @@ -991,11 +1020,11 @@ def fail_json(self, msg): if iScraper.moid: (stdin, stdout, stderr) = iScraper.esxiCnx.exec_command("vim-cmd vmsvc/power.getstate " + str(iScraper.moid)) if re.search('Powered off', stdout.read().decode('UTF-8')) is not None: - response, cookies = iScraper.send_req('<_this type="VirtualMachine">' + str(iScraper.moid) + '') + response, cookies = iScraper.soap_client.send_req('<_this type="VirtualMachine">' + str(iScraper.moid) + '') if iScraper.soap_client.wait_for_task(xml.dom.minidom.parseString(response.read()).getElementsByTagName('returnval')[0].firstChild.data, int(module.params['wait_timeout'])) != 'success': module.fail_json(msg="Failed to PowerOnVM_Task") else: - response, cookies = iScraper.send_req('<_this type="VirtualMachine">' + str(iScraper.moid) + '') + response, cookies = iScraper.soap_client.send_req('<_this type="VirtualMachine">' + str(iScraper.moid) + '') if iScraper.soap_client.wait_for_task(xml.dom.minidom.parseString(response.read()).getElementsByTagName('returnval')[0].firstChild.data, int(module.params['wait_timeout'])) != 'success': module.fail_json(msg="Failed to RebootGuest") module.exit_json(changed=True, meta={"msg": "Rebooted " + iScraper.name + ": " + str(iScraper.moid)}) @@ -1021,47 +1050,47 @@ def fail_json(self, msg): if createVmResult != None: module.fail_json(msg="Failed to create_vm: %s" % createVmResult) + else: + updateVmResult = iScraper.update_vm(annotation=module.params['annotation'], disks=module.params['disks']) + if updateVmResult != None: + module.fail_json(msg=updateVmResult) + + (stdin, stdout, stderr) = iScraper.esxiCnx.exec_command("vim-cmd vmsvc/power.getstate " + str(iScraper.moid)) + if re.search('Powered off', stdout.read().decode('UTF-8')) is not None: response, cookies = iScraper.soap_client.send_req('<_this type="VirtualMachine">' + str(iScraper.moid) + '') - waitresp = iScraper.soap_client.wait_for_task(xml.dom.minidom.parseString(response.read()).getElementsByTagName('returnval')[0].firstChild.data, int(module.params['wait_timeout'])) - if waitresp != 'success': - module.fail_json(msg="Failed to PowerOnVM_Task: %s" % waitresp) + if iScraper.soap_client.wait_for_task(xml.dom.minidom.parseString(response.read()).getElementsByTagName('returnval')[0].firstChild.data, int(module.params['wait_timeout'])) != 'success': + module.fail_json(msg="Failed to PowerOnVM_Task") - isChanged = True + isChanged = True - ## Delete the cloud-init config - iScraper.delete_cloudinit() + ## Delete the cloud-init config + iScraper.delete_cloudinit() - if "wait" in module.params and module.params['wait']: - time_s = int(module.params['wait_timeout']) - while time_s > 0: - (stdin, stdout, stderr) = iScraper.esxiCnx.exec_command("vim-cmd vmsvc/get.guest " + str(iScraper.moid)) - guest_info = stdout.read().decode('UTF-8') - vm_params = re.search('\s*hostName\s*=\s*\"?(?P.*?)\"?,.*\n\s*ipAddress\s*=\s*\"?(?P.*?)\"?,.*', guest_info) - if vm_params and vm_params.group('vm_ip') != "" and vm_params.group('vm_hostname') != "": - break - else: - time.sleep(1) - time_s = time_s - 1 - - module.exit_json(changed=isChanged, - guest_info=guest_info, - hostname=vm_params.group('vm_hostname'), - ip_address=vm_params.group('vm_ip'), - name=module.params['name'], - moid=iScraper.moid) - else: - module.exit_json(changed=isChanged, - hostname="", - ip_address="", - name=module.params['name'], - moid=iScraper.moid) + ## Wait for IP address and hostname to be advertised by the VM (via open-vm-tools) + if "wait" in module.params and module.params['wait']: + time_s = int(module.params['wait_timeout']) + while time_s > 0: + (stdin, stdout, stderr) = iScraper.esxiCnx.exec_command("vim-cmd vmsvc/get.guest " + str(iScraper.moid)) + guest_info = stdout.read().decode('UTF-8') + vm_params = re.search('\s*hostName\s*=\s*\"?(?P.*?)\"?,.*\n\s*ipAddress\s*=\s*\"?(?P.*?)\"?,.*', guest_info) + if vm_params and vm_params.group('vm_ip') != "" and vm_params.group('vm_hostname') != "": + break + else: + time.sleep(1) + time_s = time_s - 1 + module.exit_json(changed=isChanged, + guest_info=guest_info, + hostname=vm_params.group('vm_hostname'), + ip_address=vm_params.group('vm_ip'), + name=module.params['name'], + moid=iScraper.moid) else: - updateVmResult = iScraper.update_vm(annotation=module.params['annotation']) - if updateVmResult != None: - module.fail_json(msg=updateVmResult) - - module.exit_json(changed=True, name=module.params['name'], moid=iScraper.moid) + module.exit_json(changed=isChanged, + hostname="", + ip_address="", + name=module.params['name'], + moid=iScraper.moid) else: module.exit_json(changed=False, meta={"msg": "No state."}) diff --git a/create/tasks/aws.yml b/create/tasks/aws.yml index af0ec685..65192a18 100644 --- a/create/tasks/aws.yml +++ b/create/tasks/aws.yml @@ -104,23 +104,6 @@ delay: 3 retries: 300 with_items: "{{r__ec2_vol.results}}" - vars: - cluster_hosts_target_denormalised_by_volume: | - {% set res = [] -%} - {%- for cht_host in cluster_hosts_target -%} - {%- for autovol in cht_host.auto_volumes -%} - {%- set elem = {} -%} - {%- for cht_host_key in cht_host.keys() -%} - {%- if cht_host_key != 'auto_volumes' -%} - {%- set _ = elem.update({cht_host_key: cht_host[cht_host_key]}) -%} - {%- else -%} - {%- set _ = elem.update({'auto_volume': autovol}) -%} - {%- endif -%} - {%- endfor -%} - {%- set _ = res.append(elem) -%} - {%- endfor -%} - {%- endfor -%} - {{res}} - name: create/aws | Tag the EBS volumes diff --git a/create/tasks/esxifree.yml b/create/tasks/esxifree.yml index 4b6f2f4e..ecf54c89 100644 --- a/create/tasks/esxifree.yml +++ b/create/tasks/esxifree.yml @@ -1,6 +1,6 @@ --- -- name: Create vmware instances from template +- name: create/esxifree | Create vmware instances from template esxifree_guest: hostname: "{{ cluster_vars.esxi_ip }}" username: "{{ cluster_vars.username }}" @@ -28,7 +28,7 @@ async: 7200 poll: 0 -- name: Wait for instance creation to complete +- name: create/esxifree | Wait for instance creation to complete async_status: jid: "{{ item.ansible_job_id }}" register: esxi_jobs diff --git a/create/tasks/main.yml b/create/tasks/main.yml index d6e95706..5af875dc 100644 --- a/create/tasks/main.yml +++ b/create/tasks/main.yml @@ -23,3 +23,21 @@ - name: "Create {{cluster_vars.type}} cluster" include_tasks: "{{cluster_vars.type}}.yml" + vars: + # auto_volumes are normally a list of volumes per host. We cannot iterate this within a non-nested ansible loop(with_items), so we denormalise/ flatten it into a new one-dimensional list, which has each volume, as well as all the parent host information. + cluster_hosts_target_denormalised_by_volume: | + {% set res = [] -%} + {%- for cht_host in cluster_hosts_target -%} + {%- for autovol in cht_host.auto_volumes -%} + {%- set elem = {} -%} + {%- for cht_host_key in cht_host.keys() -%} + {%- if cht_host_key != 'auto_volumes' -%} + {%- set _ = elem.update({cht_host_key: cht_host[cht_host_key]}) -%} + {%- else -%} + {%- set _ = elem.update({'auto_volume': autovol}) -%} + {%- endif -%} + {%- endfor -%} + {%- set _ = res.append(elem) -%} + {%- endfor -%} + {%- endfor -%} + {{res}} diff --git a/redeploy/__common/tasks/poweroff_vms.yml b/redeploy/__common/tasks/poweroff_vms.yml index 38e06772..3b8fccf0 100644 --- a/redeploy/__common/tasks/poweroff_vms.yml +++ b/redeploy/__common/tasks/poweroff_vms.yml @@ -64,7 +64,7 @@ username: "{{ cluster_vars.username }}" password: "{{ cluster_vars.password }}" name: "{{item.name}}" - state: present + state: unchanged annotation: "{{ item.tagslabels | combine({'maintenance_mode': 'true'}) }}" with_items: "{{ hosts_to_stop }}" @@ -76,6 +76,5 @@ name: "{{item.name}}" state: shutdownguest with_items: "{{ hosts_to_stop }}" - when: cluster_vars.type == "esxifree" when: hosts_to_stop | length \ No newline at end of file diff --git a/redeploy/__common/tasks/poweron_vms.yml b/redeploy/__common/tasks/poweron_vms.yml index 363a9aee..da950fff 100644 --- a/redeploy/__common/tasks/poweron_vms.yml +++ b/redeploy/__common/tasks/poweron_vms.yml @@ -18,7 +18,6 @@ run_once: true when: cluster_vars.type == "aws" - - name: poweron_vms | Power-on GCP GCE VM(s) asynchronously block: - name: poweron_vms | Power-on GCP GCE VM(s) @@ -45,7 +44,6 @@ with_items: "{{r__gcp_compute_instance.results}}" when: cluster_vars.type == "gcp" - - name: poweron_vms | Power-on esxifree VM(s) block: - name: poweron_vms | Power-on esxifree VM(s) @@ -54,6 +52,7 @@ username: "{{ cluster_vars.username }}" password: "{{ cluster_vars.password }}" name: "{{item.name}}" - state: powered-on + state: poweredon + with_items: "{{ hosts_to_start }}" when: cluster_vars.type == "esxifree" when: hosts_to_start | length \ No newline at end of file diff --git a/redeploy/__common/tasks/set_lifecycle_state_label.yml b/redeploy/__common/tasks/set_lifecycle_state_label.yml index 093a50be..33b97b34 100644 --- a/redeploy/__common/tasks/set_lifecycle_state_label.yml +++ b/redeploy/__common/tasks/set_lifecycle_state_label.yml @@ -36,7 +36,7 @@ username: "{{ cluster_vars.username }}" password: "{{ cluster_vars.password }}" name: "{{item.name}}" - state: present + state: "unchanged" annotation: "{{ item.tagslabels | combine({'lifecycle_state': new_state}) }}" with_items: "{{ hosts_to_relabel }}" when: cluster_vars.type == "esxifree" From 31b23c708b141c038366ab26840974d12689df3c Mon Sep 17 00:00:00 2001 From: Dougal Seeley Date: Thu, 1 Oct 2020 18:42:08 +0100 Subject: [PATCH 09/58] Extra protection for redeploy/_scheme_rmvm_keepdisk_rollback when disks have changed. --- _dependencies/tasks/main.yml | 22 +++++----- create/tasks/main.yml | 2 +- .../tasks/by_hosttype_by_host.yml | 22 +++++----- .../tasks/main.yml | 31 +------------- .../tasks/preflight.yml | 40 +++++++++++++++++++ 5 files changed, 67 insertions(+), 50 deletions(-) create mode 100644 redeploy/_scheme_rmvm_keepdisk_rollback/tasks/preflight.yml diff --git a/_dependencies/tasks/main.yml b/_dependencies/tasks/main.yml index 8cf92f20..ad6b4d29 100644 --- a/_dependencies/tasks/main.yml +++ b/_dependencies/tasks/main.yml @@ -18,23 +18,23 @@ - name: Preflight check block: - - assert: { that: "ansible_version.full is version_compare('2.9', '>=')", msg: "Ansible >=2.9 required." } - - assert: { that: "app_name is defined and app_name != ''", msg: "Please define app_name" } - - assert: { that: "app_class is defined and app_class != ''", msg: "Please define app_class" } - - assert: { that: "clusterid is defined and cluster_vars is defined", msg: "Please define clusterid" } - - assert: { that: "buildenv is defined and cluster_vars[buildenv] is defined", msg: "Please define buildenv" } + - assert: { that: "ansible_version.full is version_compare('2.9', '>=')", fail_msg: "Ansible >=2.9 required." } + - assert: { that: "app_name is defined and app_name != ''", fail_msg: "Please define app_name" } + - assert: { that: "app_class is defined and app_class != ''", fail_msg: "Please define app_class" } + - assert: { that: "clusterid is defined and cluster_vars is defined", fail_msg: "Please define clusterid" } + - assert: { that: "buildenv is defined and cluster_vars[buildenv] is defined", fail_msg: "Please define buildenv" } ## Tags/ labels must be compatible with GCP and AWS - check everything that goes into a label. - - assert: { that: "release_version is regex('^[a-z\\d\\-_]{0,63}$')", msg: "Please ensure release_version ({{release_version}}) is in the set [a-z\\d\\-_], and <63 characters long." } + - assert: { that: "release_version is regex('^[a-z\\d\\-_]{0,63}$')", fail_msg: "Please ensure release_version ({{release_version}}) is in the set [a-z\\d\\-_], and <63 characters long." } when: release_version is defined - - assert: { that: "cluster_suffix is regex('^[a-z\\d\\-_]{0,63}$')", msg: "Please ensure cluster_suffix ({{cluster_suffix}}) is in the set[a-z\\d\\-_], and <63 characters long." } + - assert: { that: "cluster_suffix is regex('^[a-z\\d\\-_]{0,63}$')", fail_msg: "Please ensure cluster_suffix ({{cluster_suffix}}) is in the set[a-z\\d\\-_], and <63 characters long." } when: cluster_suffix is defined - assert: { that: "'{%- for label in cluster_vars.custom_tagslabels -%}{% if not cluster_vars.custom_tagslabels[label] is regex('^[a-z\\d\\-_]{0,63}$') %}{{label}}: {{cluster_vars.custom_tagslabels[label]}}{% endif %}{%- endfor -%}' == ''", fail_msg: "Please ensure all cluster_vars.custom_tagslabels are in the set [a-z\\d\\-_], and <63 characters long." } when: "'custom_tagslabels' in cluster_vars" - assert: { that: "'{%- for hosttype in cluster_vars[buildenv].hosttype_vars -%}{% if ('version' in cluster_vars[buildenv].hosttype_vars[hosttype]) and (not cluster_vars[buildenv].hosttype_vars[hosttype].version is regex('^[a-z\\d\\-_]{0,63}$')) %}{{cluster_vars[buildenv].hosttype_vars[hosttype].version}}{% endif %}{%- endfor -%}' == ''", fail_msg: "Please ensure cluster_vars[{{buildenv}}].hosttype_vars[hosttype].version is in the set [a-z\\d\\-_], and <63 characters long." } - - assert: { that: "cluster_vars[buildenv] | json_query(\"hosttype_vars.*.auto_volumes[] | [?contains(`/dev/sdb,/dev/sdc,/dev/sdd,/dev/sde`, device_name)]\") | length == 0", msg: "/dev/sd[b-e] are not allowed as device_name in AWS cluster_vars[buildenv].hosttype_vars. Please start at /dev/sdf." } - when: cluster_vars.type == "aws" - - - assert: { that: "(cluster_vars.assign_public_ip == 'yes' and cluster_vars.inventory_ip == 'public') or (cluster_vars.inventory_ip == 'private')", msg: "If inventory_ip=='public', 'assign_public_ip' must be 'yes'" } + - assert: { that: "(cluster_vars.assign_public_ip == 'yes' and cluster_vars.inventory_ip == 'public') or (cluster_vars.inventory_ip == 'private')", fail_msg: "If inventory_ip=='public', 'assign_public_ip' must be 'yes'" } when: cluster_vars.type == "gcp" or cluster_vars.type == "aws" + + - assert: { that: "cluster_vars[buildenv] | json_query(\"hosttype_vars.*.auto_volumes[] | [?contains(`/dev/sdb,/dev/sdc,/dev/sdd,/dev/sde`, device_name)]\") | length == 0", fail_msg: "/dev/sd[b-e] are not allowed as device_name in AWS cluster_vars[buildenv].hosttype_vars. Please start at /dev/sdf." } + when: cluster_vars.type == "aws" diff --git a/create/tasks/main.yml b/create/tasks/main.yml index 5af875dc..dfbb3db4 100644 --- a/create/tasks/main.yml +++ b/create/tasks/main.yml @@ -24,7 +24,7 @@ - name: "Create {{cluster_vars.type}} cluster" include_tasks: "{{cluster_vars.type}}.yml" vars: - # auto_volumes are normally a list of volumes per host. We cannot iterate this within a non-nested ansible loop(with_items), so we denormalise/ flatten it into a new one-dimensional list, which has each volume, as well as all the parent host information. + # auto_volumes are normally a list of volumes per host. We cannot iterate this within a non-nested ansible loop(with_items), so we denormalise/ flatten it into a new one-dimensional list, of each volume, as well as all the parent host information. cluster_hosts_target_denormalised_by_volume: | {% set res = [] -%} {%- for cht_host in cluster_hosts_target -%} diff --git a/redeploy/_scheme_rmvm_keepdisk_rollback/tasks/by_hosttype_by_host.yml b/redeploy/_scheme_rmvm_keepdisk_rollback/tasks/by_hosttype_by_host.yml index bcc20497..a1f88ea4 100644 --- a/redeploy/_scheme_rmvm_keepdisk_rollback/tasks/by_hosttype_by_host.yml +++ b/redeploy/_scheme_rmvm_keepdisk_rollback/tasks/by_hosttype_by_host.yml @@ -2,15 +2,19 @@ - debug: msg="by_hosttype_by_host | Attempting to redeploy {{host_to_redeploy.hostname}}" -- name: by_hosttype_by_host | run predeleterole role - include_role: - name: "{{predeleterole}}" - when: predeleterole is defined and predeleterole != "" - -- name: by_hosttype_by_host | Power off old VM - include_role: - name: clusterverse/redeploy/__common - tasks_from: poweroff_vms.yml +- name: stop/ remove previous instance + block: + - name: by_hosttype_by_host | run predeleterole role + include_role: + name: "{{predeleterole}}" + vars: + hosts_to_remove: "{{ hosts_to_stop }}" + when: predeleterole is defined and predeleterole != "" + + - name: by_hosttype_by_host | Power off old VM + include_role: + name: clusterverse/redeploy/__common + tasks_from: poweroff_vms.yml vars: _host_to_redeploy_nosuffix: "{{host_to_redeploy.hostname | regex_replace('-(?!.*-).*')}}" #Remove the cluster_suffix from the hostname hosts_to_stop: "{{ cluster_hosts_state | to_json | from_json | json_query(\"[?tagslabels.lifecycle_state!='current' && starts_with(name, '\" + _host_to_redeploy_nosuffix + \"')]\") }}" diff --git a/redeploy/_scheme_rmvm_keepdisk_rollback/tasks/main.yml b/redeploy/_scheme_rmvm_keepdisk_rollback/tasks/main.yml index 328599e7..65fb69d7 100644 --- a/redeploy/_scheme_rmvm_keepdisk_rollback/tasks/main.yml +++ b/redeploy/_scheme_rmvm_keepdisk_rollback/tasks/main.yml @@ -1,34 +1,7 @@ --- -- name: Preflight check - block: - - block: - - name: Preflight check | ec2_instance_info - ec2_instance_info: - filters: { "instance-state-name": [ "running", "stopped" ], "tag:cluster_name": "{{cluster_name}}", "tag:lifecycle_state": "current" } - aws_access_key: "{{cluster_vars[buildenv].aws_access_key}}" - aws_secret_key: "{{cluster_vars[buildenv].aws_secret_key}}" - region: "{{cluster_vars.region}}" - register: r__ec2_instance_info - - - assert: { that: "_invalid_disks | length == 0", msg: "EBS disks with a device_name of /dev/sd[b-e] cannot be reattached to a new instance (an AWS limitation) [found on: {{ _invalid_disks | join(',')}}]. To replace these, you must use a redeploy scheme that copies the disks." } - vars: { _invalid_disks: "{{ r__ec2_instance_info.instances | json_query(\"[?block_device_mappings[?contains(`/dev/sdb,/dev/sdc,/dev/sdd,/dev/sde`, device_name)]].tags.Name\") }}" } - when: cluster_vars.type == "aws" - - - assert: { that: "_scheme_rmvm_keepdisk_only__copy_or_move is defined and _scheme_rmvm_keepdisk_only__copy_or_move in ['copy', 'move']", msg: "ERROR - _scheme_rmvm_keepdisk_only__copy_or_move must be defined and set to either 'copy' or 'move'" } - when: cluster_vars.type == "esxifree" - - - assert: { that: "non_current_hosts | length == 0", msg: "ERROR - All VMs must be in the 'current' lifecycle_state. Those not [{{non_current_hosts | join(',')}}]" } - vars: - non_current_hosts: "{{ cluster_hosts_state | json_query(\"[?tagslabels.lifecycle_state!='current'].name\") }}" - when: (canary=="start" or canary=="none") and (ignore_lifecycle_check is not defined or ignore_lifecycle_check==false) - - - assert: - that: "{{chs_hosts | difference(cht_hosts) | length==0}}" - msg: "Cannot use this scheme to redeploy to smaller cluster; [{{ chs_hosts | join(',') }}] > [{{ cht_hosts | join(',') }}]" - vars: - cht_hosts: "{{ cluster_hosts_target | json_query(\"[].hostname\") | map('regex_replace', '-(?!.*-).*') | list }}" - chs_hosts: "{{ cluster_hosts_state | json_query(\"[].name\") | map('regex_replace', '-(?!.*-).*') | list }}" +- name: Include preflight checks/ assertions. + include_tasks: preflight.yml - name: Redeploy by hosttype; rollback on fail block: diff --git a/redeploy/_scheme_rmvm_keepdisk_rollback/tasks/preflight.yml b/redeploy/_scheme_rmvm_keepdisk_rollback/tasks/preflight.yml new file mode 100644 index 00000000..600f955d --- /dev/null +++ b/redeploy/_scheme_rmvm_keepdisk_rollback/tasks/preflight.yml @@ -0,0 +1,40 @@ +--- + +- name: Preflight check + block: + - block: + - name: Preflight check | get ec2_instance_info for current disk information + ec2_instance_info: + filters: { "instance-state-name": [ "running", "stopped" ], "tag:cluster_name": "{{cluster_name}}", "tag:lifecycle_state": "current" } + aws_access_key: "{{cluster_vars[buildenv].aws_access_key}}" + aws_secret_key: "{{cluster_vars[buildenv].aws_secret_key}}" + region: "{{cluster_vars.region}}" + register: r__ec2_instance_info + + - assert: { that: "_invalid_disks | length == 0", fail_msg: "EBS disks with a device_name of /dev/sd[b-e] cannot be reattached to a new instance (an AWS limitation) [found on: {{ _invalid_disks | join(',')}}]. To replace these, you must use a redeploy scheme that copies the disks." } + vars: { _invalid_disks: "{{ r__ec2_instance_info.instances | json_query(\"[?block_device_mappings[?contains(`/dev/sdb,/dev/sdc,/dev/sdd,/dev/sde`, device_name)]].tags.Name\") }}" } + + - assert: { that: "ec2_disks_is_subset_of_target_disks|bool==false", fail_msg: "Existing EBS disks must have be same as (or a subset of) the auto_volumes defined in your cluster_vars definition.", success_msg: "Success - Existing EBS disks are the same as (or a subset of) the auto_volumes defined in your cluster_vars definition" } + vars: + ec2_disks_is_subset_of_target_disks: |- + {%- set testloop = namespace(is_not_subset=false) -%} + {%- for cht_host in cluster_hosts_target | json_query('[].{hostname: hostname, discs: auto_volumes[].device_name}') -%} + {%- for ec2_host in r__ec2_instance_info.instances | json_query('[?tags.lifecycle_state != "current"].{hostname: tags.Name, discs: block_device_mappings[].device_name | [1:]}') -%} + {%- if cht_host.hostname | regex_replace('-(?!.*-).*') == ec2_host.hostname | regex_replace('-(?!.*-).*') -%} + {%- if not ec2_host.discs is subset(cht_host.discs) -%} + {%- set testloop.is_not_subset = true -%} + {%- endif -%} + {%- endif -%} + {%- endfor -%} + {%- endfor -%} + {{ testloop.is_not_subset }} + when: cluster_vars.type == "aws" + + - assert: { that: "_scheme_rmvm_keepdisk_only__copy_or_move is defined and _scheme_rmvm_keepdisk_only__copy_or_move in ['copy', 'move']", fail_msg: "ERROR - _scheme_rmvm_keepdisk_only__copy_or_move must be defined and set to either 'copy' or 'move'" } + when: cluster_vars.type == "esxifree" + + - assert: { that: "non_current_hosts | length == 0", fail_msg: "ERROR - All VMs must be in the 'current' lifecycle_state. Those not [{{non_current_hosts | join(',')}}]" } + vars: { non_current_hosts: "{{ cluster_hosts_state | json_query(\"[?tagslabels.lifecycle_state!='current'].name\") }}" } + when: (canary=="start" or canary=="none") and (ignore_lifecycle_check is not defined or ignore_lifecycle_check==false) + + - assert: { that: "(cluster_hosts_state | selectattr('tagslabels.lifecycle_state', '==', 'current') | list | length) == (cluster_hosts_target | length)", fail_msg: "Cannot use this scheme to redeploy to a different-sized cluster" } From 8b5c07176cd9bebd68d796414a06cbea3fd5fc7a Mon Sep 17 00:00:00 2001 From: Dougal Seeley Date: Sun, 4 Oct 2020 23:34:52 +0100 Subject: [PATCH 10/58] Create a new redeploy scheme (_scheme_rmvm_keepdisk_rollback), which moves disks between old and new VMs, saving a lot of time. + Replace the disks_auto logic to use the actual mapping of AWS device name to OS device name. This is necessary to support the disk moving scheme, and is much more reliable; include the 'ebsmap' module from https://github.com/dseeley/ebsmap to do this. + Mount disks using UUID instead of device string, as with nvme disks, the device names can change between restarts (https://docs.aws.amazon.com/AWSEC2/latest/UserGuide/nvme-ebs-volumes.html#identify-nvme-ebs-device). --- EXAMPLE/group_vars/_skel/cluster_vars.yml | 17 +- .../group_vars/test_aws_euw1/cluster_vars.yml | 17 +- .../group_vars/test_gcp_euw1/cluster_vars.yml | 3 + README.md | 29 +- _dependencies/library/ebsmap.py | 270 ++++++++++++++++++ _dependencies/library/ebsmap__LICENSE | 53 ++++ _dependencies/library/ebsmap__README.md | 24 ++ _dependencies/tasks/main.yml | 19 +- _dependencies/vars_plugins/cli_facts.py | 2 +- .../tasks/get_cluster_hosts_state.yml | 39 --- config/tasks/disks_auto_aws.yml | 109 +++++++ config/tasks/disks_auto_aws_nvme.yml | 112 -------- ...{disks_auto.yml => disks_auto_generic.yml} | 36 +-- config/tasks/main.yml | 10 +- config/tasks/metricbeat.yml | 0 create/tasks/aws.yml | 117 +++++--- create/tasks/main.yml | 18 ++ redeploy/__common/tasks/poweron_vms.yml | 1 - .../tasks/set_lifecycle_state_label.yml | 1 + ..._diskinfo_to_cluster_hosts_target__aws.yml | 33 +++ .../tasks/by_hosttype.yml | 23 ++ .../tasks/by_hosttype_by_host.yml | 50 ++++ .../tasks/main.yml | 115 ++++++++ .../tasks/preflight.yml | 39 +++ redeploy/tasks/main.yml | 2 +- 25 files changed, 896 insertions(+), 243 deletions(-) create mode 100644 _dependencies/library/ebsmap.py create mode 100644 _dependencies/library/ebsmap__LICENSE create mode 100644 _dependencies/library/ebsmap__README.md create mode 100644 config/tasks/disks_auto_aws.yml delete mode 100644 config/tasks/disks_auto_aws_nvme.yml rename config/tasks/{disks_auto.yml => disks_auto_generic.yml} (53%) mode change 100755 => 100644 config/tasks/metricbeat.yml create mode 100644 redeploy/_scheme_rmvm_keepdisk_rollback/tasks/_add_src_diskinfo_to_cluster_hosts_target__aws.yml create mode 100644 redeploy/_scheme_rmvm_keepdisk_rollback/tasks/by_hosttype.yml create mode 100644 redeploy/_scheme_rmvm_keepdisk_rollback/tasks/by_hosttype_by_host.yml create mode 100644 redeploy/_scheme_rmvm_keepdisk_rollback/tasks/main.yml create mode 100644 redeploy/_scheme_rmvm_keepdisk_rollback/tasks/preflight.yml diff --git a/EXAMPLE/group_vars/_skel/cluster_vars.yml b/EXAMPLE/group_vars/_skel/cluster_vars.yml index 5f2029f9..e1081886 100644 --- a/EXAMPLE/group_vars/_skel/cluster_vars.yml +++ b/EXAMPLE/group_vars/_skel/cluster_vars.yml @@ -1,5 +1,7 @@ --- +redeploy_schemes_supported: [] + # GCP credentials gcp_credentials_file: "{{ lookup('env','GCP_CREDENTIALS') | default('/dev/null', true) }}" gcp_credentials_json: "{{ lookup('file', gcp_credentials_file) | default({'project_id': 'GCP_CREDENTIALS__NOT_SET','client_email': 'GCP_CREDENTIALS__NOT_SET'}, true) }}" @@ -70,13 +72,14 @@ cluster_name: "{{app_name}}-{{buildenv}}" # Identifies the cluster within # sandbox: # hosttype_vars: # sys: {vms_by_az: {a: 1, b: 1, c: 1}, flavor: t3a.nano, version: "{{sys_version | default('')}}", auto_volumes: []} -# # sysnobeats: {vms_by_az: {a: 1, b: 0, c: 0}, skip_beat_install:true, flavor: t3a.nano, version: "{{sysnobeats_version | default('')}}", auto_volumes: [] -# # sysdisks: {vms_by_az: {a: 1, b: 0, c: 0}, flavor: t3a.nano, version: "{{sysdisks_version | default('')}}", auto_volumes: [{"device_name": "/dev/sdb", mountpoint: "/var/log/mysvc", fstype: "ext4", "volume_type": "gp2", "volume_size": 2, ephemeral: False, encrypted: True, "delete_on_termination": true, perms: {owner: "root", group: "sudo", mode: "775"} }, {"device_name": "/dev/sdc", mountpoint: "/var/log/mysvc2", fstype: "ext4", "volume_type": "gp2", "volume_size": 3, ephemeral: False, encrypted: True, "delete_on_termination": true}, {"device_name": "/dev/sdd", mountpoint: "/var/log/mysvc3", fstype: "ext4", "volume_type": "gp2", "volume_size": 2, ephemeral: False, encrypted: True, "delete_on_termination": true}]} -# # sysdisks_snapshot: {vms_by_az: {a: 1, b: 1, c: 0}, flavor: t3a.nano, auto_volumes: [{"snapshot_tags": {"tag:backup_id": "57180566894481854905"}, "device_name": "/dev/sdb", mountpoint: "/data", fstype: "ext4", "volume_type": "gp2", "volume_size": 2, ephemeral: False, encrypted: True, "delete_on_termination": true }]} -# # hostnvme_multi: {vms_by_az: {a: 1, b: 0, c: 0}, flavor: i3en.2xlarge, auto_volumes: [], nvme: {volumes: [{mountpoint: "/var/log/mysvc", fstype: ext4, volume_size: 2500}, {mountpoint: "/var/log/mysvc2", fstype: ext4, volume_size: 2500}]} } -# # hostnvme_lvm: {vms_by_az: {a: 1, b: 0, c: 0}, flavor: i3en.2xlarge, auto_volumes: [], nvme: {volumes: [{mountpoint: "/var/log/mysvc", fstype: ext4, volume_size: 2500}, {mountpoint: "/var/log/mysvc", fstype: ext4, volume_size: 2500}], lvmparams: {vg_name: "vg0", lv_name: "lv0", lv_size: "+100%FREE"} } } -# # hostssd: {vms_by_az: {a: 1, b: 0, c: 0}, flavor: c3.large, auto_volumes: [{device_name: "/dev/sdb", mountpoint: "/var/log/mysvc", fstype: "ext4", "volume_type": "gp2", "volume_size": 2, ephemeral: False, encrypted: True, "delete_on_termination": true}]} -# # hosthdd: {vms_by_az: {a: 1, b: 0, c: 0}, flavor: h1.2xlarge, auto_volumes: [{device_name: "/dev/sdb", mountpoint: "/var/log/mysvc", fstype: "ext4", "volume_type": "gp2", "volume_size": 2, ephemeral: False, encrypted: True, "delete_on_termination": true}]} +# #sysdisks2: {vms_by_az: {a: 1, b: 0, c: 0}, flavor: t3a.nano, version: "{{sysdisks_version | default('')}}", auto_volumes: [{"device_name": "/dev/sdf", mountpoint: "/media/mysvc", fstype: "ext4", "volume_type": "gp2", "volume_size": 1, encrypted: True, "delete_on_termination": true, perms: {owner: "root", group: "sudo", mode: "775"} }, {"device_name": "/dev/sdg", mountpoint: "/media/mysvc2", fstype: "ext4", "volume_type": "gp2", "volume_size": 1, encrypted: True, "delete_on_termination": true}]} +# #sysdisks3: {vms_by_az: {a: 1, b: 0, c: 0}, flavor: t3a.nano, version: "{{sysdisks_version | default('')}}", auto_volumes: [{"device_name": "/dev/sdf", mountpoint: "/media/mysvc", fstype: "ext4", "volume_type": "gp2", "volume_size": 1, encrypted: True, "delete_on_termination": true, perms: {owner: "root", group: "sudo", mode: "775"} }, {"device_name": "/dev/sdg", mountpoint: "/media/mysvc2", fstype: "ext4", "volume_type": "gp2", "volume_size": 1, encrypted: True, "delete_on_termination": true}, {"device_name": "/dev/sdh", mountpoint: "/media/mysvc3", fstype: "ext4", "volume_type": "gp2", "volume_size": 1, encrypted: True, "delete_on_termination": true}]} +# #sysdisks_snapshot: {vms_by_az: {a: 1, b: 1, c: 0}, flavor: t3a.nano, version: "{{sys_version | default('')}}", auto_volumes: [{"snapshot_tags": {"tag:backup_id": "57180566894481854905"}, "device_name": "/dev/sdf", mountpoint: "/data", fstype: "ext4", "volume_type": "gp2", "volume_size": 1, encrypted: True, "delete_on_termination": true }]} +# #hostnvme_multi: {vms_by_az: {a: 1, b: 0, c: 0}, flavor: i3en.2xlarge, version: "{{sys_version | default('')}}", auto_volumes: [{device_name: "/dev/sdb", mountpoint: "/media/data", fstype: "ext4", "volume_type": "ephemeral", ephemeral: ephemeral0}, {device_name: "/dev/sdbc", mountpoint: "/media/data", fstype: "ext4", "volume_type": "ephemeral", ephemeral: ephemeral0}] } } +# #hostnvme_lvm: {vms_by_az: {a: 1, b: 0, c: 0}, flavor: i3en.2xlarge, version: "{{sys_version | default('')}}", auto_volumes: [{device_name: "/dev/sdb", mountpoint: "/media/data", fstype: "ext4", "volume_type": "ephemeral", ephemeral: ephemeral0}, {device_name: "/dev/sdbc", mountpoint: "/media/data", fstype: "ext4", "volume_type": "ephemeral", ephemeral: ephemeral0}], lvmparams: {vg_name: "vg0", lv_name: "lv0", lv_size: "+100%FREE"} } +# #hosthdd_multi: {vms_by_az: {a: 1, b: 0, c: 0}, flavor: d2.xlarge, version: "{{sys_version | default('')}}", auto_volumes: [{device_name: "/dev/sdb", mountpoint: "/media/mysvc", fstype: "ext4", "volume_type": "ephemeral", ephemeral: ephemeral0}, {device_name: "/dev/sdc", mountpoint: "/media/mysvc2", fstype: "ext4", "volume_type": "ephemeral", ephemeral: ephemeral1}, {device_name: "/dev/sdd", mountpoint: "/media/mysvc3", fstype: "ext4", "volume_type": "ephemeral", ephemeral: ephemeral2}] } +# #hosthdd_lvm: {vms_by_az: {a: 1, b: 0, c: 0}, flavor: d2.xlarge, version: "{{sys_version | default('')}}", auto_volumes: [{device_name: "/dev/sdb", mountpoint: "/media/data", fstype: "ext4", "volume_type": "ephemeral", ephemeral: ephemeral0}, {device_name: "/dev/sdc", mountpoint: "/media/data", fstype: "ext4", "volume_type": "ephemeral", ephemeral: ephemeral1}, {device_name: "/dev/sdd", mountpoint: "/media/data", fstype: "ext4", "volume_type": "ephemeral", ephemeral: ephemeral2}], lvmparams: {vg_name: "vg0", lv_name: "lv0", lv_size: "+100%FREE"} } +# #hostssd: {vms_by_az: {a: 1, b: 0, c: 0}, flavor: c3.large, version: "{{sys_version | default('')}}", auto_volumes: [{device_name: "/dev/sdf", mountpoint: "/media/mysvc", fstype: "ext4", "volume_type": "gp2", "volume_size": 1, ephemeral: 0, encrypted: True, "delete_on_termination": true}]} # aws_access_key: "" # aws_secret_key: "" # vpc_name: "test{{buildenv}}" diff --git a/EXAMPLE/group_vars/test_aws_euw1/cluster_vars.yml b/EXAMPLE/group_vars/test_aws_euw1/cluster_vars.yml index e262bf7d..810f4be2 100644 --- a/EXAMPLE/group_vars/test_aws_euw1/cluster_vars.yml +++ b/EXAMPLE/group_vars/test_aws_euw1/cluster_vars.yml @@ -1,8 +1,11 @@ --- +redeploy_schemes_supported: ['_scheme_addallnew_rmdisk_rollback', '_scheme_addnewvm_rmdisk_rollback', '_scheme_rmvm_rmdisk_only', '_scheme_rmvm_keepdisk_rollback'] + #redeploy_scheme: _scheme_addallnew_rmdisk_rollback #redeploy_scheme: _scheme_addnewvm_rmdisk_rollback #redeploy_scheme: _scheme_rmvm_rmdisk_only +#redeploy_scheme: _scheme_rmvm_keepdisk_rollback app_name: "test" # The name of the application cluster (e.g. 'couchbase', 'nginx'); becomes part of cluster_name. app_class: "test" # The class of application (e.g. 'database', 'webserver'); becomes part of the fqdn @@ -76,12 +79,14 @@ cluster_vars: sandbox: hosttype_vars: sys: {vms_by_az: {a: 1, b: 1, c: 1}, flavor: t3a.nano, version: "{{sys_version | default('')}}", auto_volumes: []} -# sysdisks: {vms_by_az: {a: 1, b: 0, c: 0}, flavor: t3a.nano, version: "{{sysdisks_version | default('')}}", auto_volumes: [{"device_name": "/dev/sdb", mountpoint: "/var/log/mysvc", fstype: "ext4", "volume_type": "gp2", "volume_size": 2, ephemeral: False, encrypted: True, "delete_on_termination": true, perms: {owner: "root", group: "sudo", mode: "775"} }, {"device_name": "/dev/sdc", mountpoint: "/var/log/mysvc2", fstype: "ext4", "volume_type": "gp2", "volume_size": 3, ephemeral: False, encrypted: True, "delete_on_termination": true}, {"device_name": "/dev/sdd", mountpoint: "/var/log/mysvc3", fstype: "ext4", "volume_type": "gp2", "volume_size": 2, ephemeral: False, encrypted: True, "delete_on_termination": true}]} -# sysdisks_snapshot: {vms_by_az: {a: 1, b: 1, c: 0}, flavor: t3a.nano, auto_volumes: [{"snapshot_tags": {"tag:backup_id": "57180566894481854905"}, "device_name": "/dev/sdb", mountpoint: "/data", fstype: "ext4", "volume_type": "gp2", "volume_size": 2, ephemeral: False, encrypted: True, "delete_on_termination": true }]} -# hostnvme_multi: {vms_by_az: {a: 1, b: 0, c: 0}, flavor: i3en.2xlarge, auto_volumes: [], nvme: {volumes: [{mountpoint: "/var/log/mysvc", fstype: ext4, volume_size: 2500}, {mountpoint: "/var/log/mysvc2", fstype: ext4, volume_size: 2500}]} } -# hostnvme_lvm: {vms_by_az: {a: 1, b: 0, c: 0}, flavor: i3en.2xlarge, auto_volumes: [], nvme: {volumes: [{mountpoint: "/var/log/mysvc", fstype: ext4, volume_size: 2500}, {mountpoint: "/var/log/mysvc", fstype: ext4, volume_size: 2500}], lvmparams: {vg_name: "vg0", lv_name: "lv0", lv_size: "+100%FREE"} } } -# hostssd: {vms_by_az: {a: 1, b: 0, c: 0}, flavor: c3.large, auto_volumes: [{device_name: "/dev/sdb", mountpoint: "/var/log/mysvc", fstype: "ext4", "volume_type": "gp2", "volume_size": 2, ephemeral: False, encrypted: True, "delete_on_termination": true}]} -# hosthdd: {vms_by_az: {a: 1, b: 0, c: 0}, flavor: h1.2xlarge, auto_volumes: [{device_name: "/dev/sdb", mountpoint: "/var/log/mysvc", fstype: "ext4", "volume_type": "gp2", "volume_size": 2, ephemeral: False, encrypted: True, "delete_on_termination": true}]} +# sysdisks2: {vms_by_az: {a: 1, b: 0, c: 0}, flavor: t3a.nano, version: "{{sysdisks_version | default('')}}", auto_volumes: [{"device_name": "/dev/sdf", mountpoint: "/media/mysvc", fstype: "ext4", "volume_type": "gp2", "volume_size": 1, encrypted: True, "delete_on_termination": true, perms: {owner: "root", group: "sudo", mode: "775"} }, {"device_name": "/dev/sdg", mountpoint: "/media/mysvc2", fstype: "ext4", "volume_type": "gp2", "volume_size": 1, encrypted: True, "delete_on_termination": true}]} +# sysdisks3: {vms_by_az: {a: 1, b: 0, c: 0}, flavor: t3a.nano, version: "{{sysdisks_version | default('')}}", auto_volumes: [{"device_name": "/dev/sdf", mountpoint: "/media/mysvc", fstype: "ext4", "volume_type": "gp2", "volume_size": 1, encrypted: True, "delete_on_termination": true, perms: {owner: "root", group: "sudo", mode: "775"} }, {"device_name": "/dev/sdg", mountpoint: "/media/mysvc2", fstype: "ext4", "volume_type": "gp2", "volume_size": 1, encrypted: True, "delete_on_termination": true}, {"device_name": "/dev/sdh", mountpoint: "/media/mysvc3", fstype: "ext4", "volume_type": "gp2", "volume_size": 1, encrypted: True, "delete_on_termination": true}]} +# sysdisks_snapshot: {vms_by_az: {a: 1, b: 1, c: 0}, flavor: t3a.nano, version: "{{sys_version | default('')}}", auto_volumes: [{"snapshot_tags": {"tag:backup_id": "57180566894481854905"}, "device_name": "/dev/sdf", mountpoint: "/data", fstype: "ext4", "volume_type": "gp2", "volume_size": 1, encrypted: True, "delete_on_termination": true }]} +# hostnvme_multi: {vms_by_az: {a: 1, b: 0, c: 0}, flavor: i3en.2xlarge, version: "{{sys_version | default('')}}", auto_volumes: [{device_name: "/dev/sdb", mountpoint: "/media/data", fstype: "ext4", "volume_type": "ephemeral", ephemeral: ephemeral0}, {device_name: "/dev/sdbc", mountpoint: "/media/data", fstype: "ext4", "volume_type": "ephemeral", ephemeral: ephemeral0}] } } +# hostnvme_lvm: {vms_by_az: {a: 1, b: 0, c: 0}, flavor: i3en.2xlarge, version: "{{sys_version | default('')}}", auto_volumes: [{device_name: "/dev/sdb", mountpoint: "/media/data", fstype: "ext4", "volume_type": "ephemeral", ephemeral: ephemeral0}, {device_name: "/dev/sdbc", mountpoint: "/media/data", fstype: "ext4", "volume_type": "ephemeral", ephemeral: ephemeral0}], lvmparams: {vg_name: "vg0", lv_name: "lv0", lv_size: "+100%FREE"} } +# hosthdd_multi: {vms_by_az: {a: 1, b: 0, c: 0}, flavor: d2.xlarge, version: "{{sys_version | default('')}}", auto_volumes: [{device_name: "/dev/sdb", mountpoint: "/media/mysvc", fstype: "ext4", "volume_type": "ephemeral", ephemeral: ephemeral0}, {device_name: "/dev/sdc", mountpoint: "/media/mysvc2", fstype: "ext4", "volume_type": "ephemeral", ephemeral: ephemeral1}, {device_name: "/dev/sdd", mountpoint: "/media/mysvc3", fstype: "ext4", "volume_type": "ephemeral", ephemeral: ephemeral2}] } +# hosthdd_lvm: {vms_by_az: {a: 1, b: 0, c: 0}, flavor: d2.xlarge, version: "{{sys_version | default('')}}", auto_volumes: [{device_name: "/dev/sdb", mountpoint: "/media/data", fstype: "ext4", "volume_type": "ephemeral", ephemeral: ephemeral0}, {device_name: "/dev/sdc", mountpoint: "/media/data", fstype: "ext4", "volume_type": "ephemeral", ephemeral: ephemeral1}, {device_name: "/dev/sdd", mountpoint: "/media/data", fstype: "ext4", "volume_type": "ephemeral", ephemeral: ephemeral2}], lvmparams: {vg_name: "vg0", lv_name: "lv0", lv_size: "+100%FREE"} } +# hostssd: {vms_by_az: {a: 1, b: 0, c: 0}, flavor: c3.large, version: "{{sys_version | default('')}}", auto_volumes: [{device_name: "/dev/sdf", mountpoint: "/media/mysvc", fstype: "ext4", "volume_type": "gp2", "volume_size": 1, ephemeral: 0, encrypted: True, "delete_on_termination": true}]} aws_access_key: "" aws_secret_key: "" vpc_name: "test{{buildenv}}" diff --git a/EXAMPLE/group_vars/test_gcp_euw1/cluster_vars.yml b/EXAMPLE/group_vars/test_gcp_euw1/cluster_vars.yml index 4b08de80..e4390373 100644 --- a/EXAMPLE/group_vars/test_gcp_euw1/cluster_vars.yml +++ b/EXAMPLE/group_vars/test_gcp_euw1/cluster_vars.yml @@ -4,9 +4,12 @@ gcp_credentials_file: "{{ lookup('env','GCP_CREDENTIALS') | default('/dev/null', true) }}" gcp_credentials_json: "{{ lookup('file', gcp_credentials_file) | default({'project_id': 'GCP_CREDENTIALS__NOT_SET','client_email': 'GCP_CREDENTIALS__NOT_SET'}, true) }}" +redeploy_schemes_supported: ['_scheme_addallnew_rmdisk_rollback', '_scheme_addnewvm_rmdisk_rollback', '_scheme_rmvm_rmdisk_only'] + #redeploy_scheme: _scheme_addallnew_rmdisk_rollback #redeploy_scheme: _scheme_addnewvm_rmdisk_rollback #redeploy_scheme: _scheme_rmvm_rmdisk_only +#redeploy_scheme: _scheme_rmvm_keepdisk_rollback app_name: "test" # The name of the application cluster (e.g. 'couchbase', 'nginx'); becomes part of cluster_name. app_class: "test" # The class of application (e.g. 'database', 'webserver'); becomes part of the fqdn diff --git a/README.md b/README.md index f6a00e5f..d963d3d9 100644 --- a/README.md +++ b/README.md @@ -5,7 +5,7 @@ A full-lifecycle, immutable cloud infrastructure cluster management **role**, us + **Scale (e.g. add a node):** If you change the config yaml and rerun the deploy, new nodes will be added. + **Redeploy (e.g. up-version):** If you need to up-version, the `redeploy.yml` playbook will replace each node in turn, (with optional callbacks), and rollback if any failures occur. -**clusterverse** is designed to deploy base-vm infrastructure that underpins cluster-based infrastructure, for example, Couchbase, or Cassandra. +**clusterverse** is designed to manage base-vm infrastructure that underpins cluster-based infrastructure, for example, Couchbase, Kafka, Elasticsearch, or Cassandra. ## Contributing Contributions are welcome and encouraged. Please see [CONTRIBUTING.md](https://github.com/sky-uk/clusterverse/blob/master/CONTRIBUTING.md) for details. @@ -35,7 +35,8 @@ To active the pipenv: ### DNS DNS is optional. If unset, no DNS names will be created. If required, you will need a DNS zone delegated to one of the following: + Bind9 -+ Route53 ++ AWS Route53 ++ Google Cloud DNS Credentials to the DNS server will also be required. These are specified in the `cluster_vars.yml` file described below. @@ -70,13 +71,15 @@ Credentials can be encrypted inline in the playbooks using [ansible-vault](https ## Usage **clusterverse** is an Ansible _role_, and as such must be imported into your \/roles directory. There is a full-featured example in the [/EXAMPLE](https://github.com/sky-uk/clusterverse/tree/master/EXAMPLE) subdirectory. -To import the role into your project, create a `requirements.yml` file containing: +To import the role into your project, create a [`requirements.yml`](https://github.com/sky-uk/clusterverse/blob/master/EXAMPLE/requirements.yml) file containing: ``` - src: https://github.com/sky-uk/clusterverse - version: master ## or hash, or version + version: master ## branch, hash, or tag name: clusterverse ``` -To install the role into a project's `roles` directory: +If you use a `cluster.yml` file similar to the example found in [EXAMPLE/cluster.yml](https://github.com/sky-uk/clusterverse/blob/master/EXAMPLE/cluster.yml), clusterverse will be installed automatically on each run of the playbook. + +To install it manually: + `ansible-galaxy install -r requirements.yml -p //roles/` @@ -110,8 +113,8 @@ The role is designed to run in two modes: + **It assumes a resilient deployment (it can tolerate one node being deleted from the cluster). There is no rollback in case of failure** + For each node in the cluster: + Run `predeleterole` - + Delete the node - + Run the main cluster.yml, which forces the missing node to be redeployed. Run with the same parameters as for the main playbook. + + Delete/ terminate the node (note, this is _irreversible_). + + Run the main cluster.yml (with the same parameters as for the main playbook), which forces the missing node to be redeployed (the `cluster_suffix` remains the same). + If the process fails at any point: + No further VMs will be deleted or rebuilt - the playbook stops. + **_scheme_addnewvm_rmdisk_rollback** @@ -130,3 +133,15 @@ The role is designed to run in two modes: + The old VMs are stopped. + If the process fails for any reason, the old VMs are reinstated, and the new VMs stopped (rollback) + To delete the old VMs, either set '-e canary_tidy_on_success=true', or call redeploy.yml with '-e canary=tidy' + + **_scheme_rmvm_keepdisk_rollback (AWS only so far)** + + Redeploys the nodes one by one, and moves the secondary (non-root) disks from the old to the new (note, only non-ephemeral disks can be moved). + + _Cluster topology must remain identical. More disks may be added, but none may change or be removed._ + + **It assumes a resilient deployment (it can tolerate one node being removed from the cluster).** + + For each node in the cluster: + + Run `predeleterole` + + Stop the node + + Detach the disks from the old node + + Run the main cluster.yml to create a new node + + Attach disks to new node + + If the process fails for any reason, the old VMs are reinstated (and the disks reattached to the old nodes), and the new VMs are stopped (rollback) + + To delete the old VMs, either set '-e canary_tidy_on_success=true', or call redeploy.yml with '-e canary=tidy' diff --git a/_dependencies/library/ebsmap.py b/_dependencies/library/ebsmap.py new file mode 100644 index 00000000..dbd6d717 --- /dev/null +++ b/_dependencies/library/ebsmap.py @@ -0,0 +1,270 @@ +# Copyright 2020 Dougal Seeley +# https://github.com/dseeley/ebsmap + +# Copyright 2017 Amazon.com, Inc. and its affiliates. All Rights Reserved. +# Licensed under the MIT License. See the LICENSE accompanying this file +# for the specific language governing permissions and limitations under +# the License. + +from __future__ import (absolute_import, division, print_function) + +__metaclass__ = type + +DOCUMENTATION = ''' +--- +module: ebsmap +version_added: 1.0.0 +short_description: ebsmap +description: + - Map the EBS device name as defined in AWS (e.g. /dev/sdf) with the volume provided to the OS +author: + - Dougal Seeley + - Amazon.com inc. +''' + +EXAMPLES = ''' +- name: Get the nvme map information + ebsmap: + become: yes + register: r__ebsmap + +- name: ebsmap + debug: msg={{ebsmap}} +''' + +RETURN = ''' +"device_map": [ + { + "FSTYPE": "ext4", + "MOUNTPOINT": "/media/mysvc", + "NAME": "nvme1n1", + "PARTLABEL": "", + "SERIAL": "vol0c2c47ee4516063e9", + "TYPE": "disk", + "UUID": "c3630dbe-042e-44e5-ac67-54fa1c9e4cd2", + "device_name_aws": "/dev/sdf", + "device_name_os": "/dev/nvme1n1", + "volume_id": "vol-0c2c47ee4516063e9" + }, + { + "FSTYPE": "", + "MOUNTPOINT": "", + "NAME": "nvme0n1", + "PARTLABEL": "", + "SERIAL": "vol0b05e48d5677db81a", + "TYPE": "disk", + "UUID": "", + "device_name_aws": "/dev/sda1", + "device_name_os": "/dev/nvme0n1", + "volume_id": "vol-0b05e48d5677db81a" + }, + { + "FSTYPE": "ext4", + "MOUNTPOINT": "/", + "NAME": "nvme0n1p1", + "PARTLABEL": "", + "SERIAL": "", + "TYPE": "part", + "UUID": "96ec7adb-9d94-41c0-96a5-d6992c9d5f20", + "device_name_aws": "/dev/sda1", + "device_name_os": "/dev/nvme0n1p1", + "volume_id": "vol-0b05e48d5677db81a" + } + +"device_map": [ + { + "FSTYPE": "", + "MOUNTPOINT": "", + "NAME": "xvda", + "PARTLABEL": "", + "SERIAL": "", + "TYPE": "disk", + "UUID": "", + "device_name_aws": "/dev/sda", + "device_name_os": "/dev/xvda" + }, + { + "FSTYPE": "ext4", + "MOUNTPOINT": "/", + "NAME": "xvda1", + "PARTLABEL": "", + "SERIAL": "", + "TYPE": "part", + "UUID": "96ec7adb-9d94-41c0-96a5-d6992c9d5f20", + "device_name_aws": "/dev/sda1", + "device_name_os": "/dev/xvda1" + } +''' + +from ctypes import * +from fcntl import ioctl +import subprocess +import sys +import json +import re + +try: + from ansible.module_utils.basic import AnsibleModule + from ansible.errors import AnsibleError + from ansible.utils.display import Display +except: + pass + +NVME_ADMIN_IDENTIFY = 0x06 +NVME_IOCTL_ADMIN_CMD = 0xC0484E41 +AMZN_NVME_VID = 0x1D0F +AMZN_NVME_EBS_MN = "Amazon Elastic Block Store" + + +class nvme_admin_command(Structure): + _pack_ = 1 + _fields_ = [("opcode", c_uint8), # op code + ("flags", c_uint8), # fused operation + ("cid", c_uint16), # command id + ("nsid", c_uint32), # namespace id + ("reserved0", c_uint64), + ("mptr", c_uint64), # metadata pointer + ("addr", c_uint64), # data pointer + ("mlen", c_uint32), # metadata length + ("alen", c_uint32), # data length + ("cdw10", c_uint32), + ("cdw11", c_uint32), + ("cdw12", c_uint32), + ("cdw13", c_uint32), + ("cdw14", c_uint32), + ("cdw15", c_uint32), + ("reserved1", c_uint64)] + + +class nvme_identify_controller_amzn_vs(Structure): + _pack_ = 1 + _fields_ = [("bdev", c_char * 32), # block device name + ("reserved0", c_char * (1024 - 32))] + + +class nvme_identify_controller_psd(Structure): + _pack_ = 1 + _fields_ = [("mp", c_uint16), # maximum power + ("reserved0", c_uint16), + ("enlat", c_uint32), # entry latency + ("exlat", c_uint32), # exit latency + ("rrt", c_uint8), # relative read throughput + ("rrl", c_uint8), # relative read latency + ("rwt", c_uint8), # relative write throughput + ("rwl", c_uint8), # relative write latency + ("reserved1", c_char * 16)] + + +class nvme_identify_controller(Structure): + _pack_ = 1 + _fields_ = [("vid", c_uint16), # PCI Vendor ID + ("ssvid", c_uint16), # PCI Subsystem Vendor ID + ("sn", c_char * 20), # Serial Number + ("mn", c_char * 40), # Module Number + ("fr", c_char * 8), # Firmware Revision + ("rab", c_uint8), # Recommend Arbitration Burst + ("ieee", c_uint8 * 3), # IEEE OUI Identifier + ("mic", c_uint8), # Multi-Interface Capabilities + ("mdts", c_uint8), # Maximum Data Transfer Size + ("reserved0", c_uint8 * (256 - 78)), + ("oacs", c_uint16), # Optional Admin Command Support + ("acl", c_uint8), # Abort Command Limit + ("aerl", c_uint8), # Asynchronous Event Request Limit + ("frmw", c_uint8), # Firmware Updates + ("lpa", c_uint8), # Log Page Attributes + ("elpe", c_uint8), # Error Log Page Entries + ("npss", c_uint8), # Number of Power States Support + ("avscc", c_uint8), # Admin Vendor Specific Command Configuration + ("reserved1", c_uint8 * (512 - 265)), + ("sqes", c_uint8), # Submission Queue Entry Size + ("cqes", c_uint8), # Completion Queue Entry Size + ("reserved2", c_uint16), + ("nn", c_uint32), # Number of Namespaces + ("oncs", c_uint16), # Optional NVM Command Support + ("fuses", c_uint16), # Fused Operation Support + ("fna", c_uint8), # Format NVM Attributes + ("vwc", c_uint8), # Volatile Write Cache + ("awun", c_uint16), # Atomic Write Unit Normal + ("awupf", c_uint16), # Atomic Write Unit Power Fail + ("nvscc", c_uint8), # NVM Vendor Specific Command Configuration + ("reserved3", c_uint8 * (704 - 531)), + ("reserved4", c_uint8 * (2048 - 704)), + ("psd", nvme_identify_controller_psd * 32), # Power State Descriptor + ("vs", nvme_identify_controller_amzn_vs)] # Vendor Specific + + +class ebs_nvme_device: + def __init__(self, device): + self.device = device + self.ctrl_identify() + + def _nvme_ioctl(self, id_response, id_len): + admin_cmd = nvme_admin_command(opcode=NVME_ADMIN_IDENTIFY, addr=id_response, alen=id_len, cdw10=1) + with open(self.device, "rt") as nvme: + ioctl(nvme, NVME_IOCTL_ADMIN_CMD, admin_cmd) + + def ctrl_identify(self): + self.id_ctrl = nvme_identify_controller() + self._nvme_ioctl(addressof(self.id_ctrl), sizeof(self.id_ctrl)) + if self.id_ctrl.vid != AMZN_NVME_VID or self.id_ctrl.mn.decode().strip() != AMZN_NVME_EBS_MN: + raise TypeError("[ERROR] Not an EBS device: '{0}'".format(self.device)) + + def get_volume_id(self): + vol = self.id_ctrl.sn.decode() + if vol.startswith("vol") and vol[3] != "-": + vol = "vol-" + vol[3:] + return vol + + def get_block_device(self, stripped=False): + device = self.id_ctrl.vs.bdev.decode() + if stripped and device.startswith("/dev/"): + device = device[5:] + return device + + +def main(): + if not (len(sys.argv) > 1 and sys.argv[1] == "console"): + module = AnsibleModule(argument_spec={}, supports_check_mode=True) + else: + # For testing without Ansible (e.g on Windows) + class cDummyAnsibleModule(): + params = {} + + def exit_json(self, changed, **kwargs): + print(changed, json.dumps(kwargs, sort_keys=True, indent=4, separators=(',', ': '))) + + def warn(self, msg): + print("[WARNING]: " + msg) + + def fail_json(self, msg): + print("Failed: " + msg) + exit(1) + + module = cDummyAnsibleModule() + + # Get all existing block volumes by key=value, then parse this into a dictionary (which excludes non disk and partition block types, e.g. ram, loop). + lsblk_devices = subprocess.check_output(['lsblk', '-o', 'NAME,TYPE,UUID,FSTYPE,PARTLABEL,MOUNTPOINT,SERIAL', '-P']).decode().rstrip().split('\n') + os_device_names = [dict((map(lambda x: x.strip("\""), sub.split("="))) for sub in dev.split('\" ') if '=' in sub) for dev in lsblk_devices] + os_device_names = [dev for dev in os_device_names if dev['TYPE'] in ['disk', 'part', 'lvm']] + + for os_device in os_device_names: + os_device_path = "/dev/" + os_device['NAME'] + if os_device['NAME'].startswith("nvme"): + try: + dev = ebs_nvme_device(os_device_path) + except FileNotFoundError as e: + module.fail_json(msg=os_device_path + ": FileNotFoundError" + str(e)) + except OSError as e: + module.warn(u"%s is not an nvme device." % os_device_path) + else: + os_device.update({"device_name_os": os_device_path, "device_name_aws": '/dev/' + dev.get_block_device(stripped=True).rstrip(), "volume_id": dev.get_volume_id()}) + elif os_device['NAME'].startswith("xvd"): + os_device.update({"device_name_os": os_device_path, "device_name_aws": '/dev/' + re.sub(r'xvd(.*)', r'sd\1', os_device['NAME'])}) + else: + os_device.update({"device_name_os": os_device_path, "device_name_aws": ""}) + + module.exit_json(changed=False, device_map=os_device_names) + + +if __name__ == '__main__': + main() diff --git a/_dependencies/library/ebsmap__LICENSE b/_dependencies/library/ebsmap__LICENSE new file mode 100644 index 00000000..55138771 --- /dev/null +++ b/_dependencies/library/ebsmap__LICENSE @@ -0,0 +1,53 @@ +BSD 3-Clause License + +Copyright (c) 2020, Dougal Seeley +All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are met: + +* Redistributions of source code must retain the above copyright notice, this + list of conditions and the following disclaimer. + +* Redistributions in binary form must reproduce the above copyright notice, + this list of conditions and the following disclaimer in the documentation + and/or other materials provided with the distribution. + +* Neither the name of the copyright holder nor the names of its + contributors may be used to endorse or promote products derived from + this software without specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE +DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE +FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR +SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER +CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, +OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + + +--- + +Copyright 2017 Amazon.com, Inc. and its affiliates. All Rights Reserved. + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is furnished +to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in all +copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +SOFTWARE. + diff --git a/_dependencies/library/ebsmap__README.md b/_dependencies/library/ebsmap__README.md new file mode 100644 index 00000000..f38b360c --- /dev/null +++ b/_dependencies/library/ebsmap__README.md @@ -0,0 +1,24 @@ +# ebsmap + +This is an Ansible module that is able to map AWS EBS device names (including NVME devices) to the host device names. + +## Credits +The bulk of the heavy lifting is nvme ioctl commands written by AWS for their Amazon Linux AMIs. See: https://docs.aws.amazon.com/AWSEC2/latest/UserGuide/nvme-ebs-volumes.html + +## Execution +This can be run as an Ansible module (needs root): +```yaml +- name: Get the nvme map information + ebsmap: + become: yes + register: r__ebsmap + +- name: ebsmap + debug: msg={{ebsmap}} + +``` + +or from the console: +```bash +python3 ./ebsmap.py console +``` \ No newline at end of file diff --git a/_dependencies/tasks/main.yml b/_dependencies/tasks/main.yml index 8f25ca87..2d300f62 100644 --- a/_dependencies/tasks/main.yml +++ b/_dependencies/tasks/main.yml @@ -18,20 +18,23 @@ - name: Preflight check block: - - assert: { that: "ansible_version.full is version_compare('2.9', '>=')", msg: "Ansible >=2.9 required." } - - assert: { that: "app_name is defined and app_name != ''", msg: "Please define app_name" } - - assert: { that: "app_class is defined and app_class != ''", msg: "Please define app_class" } - - assert: { that: "clusterid is defined and cluster_vars is defined", msg: "Please define clusterid" } - - assert: { that: "buildenv is defined and cluster_vars[buildenv] is defined", msg: "Please define buildenv" } + - assert: { that: "ansible_version.full is version_compare('2.9', '>=')", fail_msg: "Ansible >=2.9 required." } + - assert: { that: "app_name is defined and app_name != ''", fail_msg: "Please define app_name" } + - assert: { that: "app_class is defined and app_class != ''", fail_msg: "Please define app_class" } + - assert: { that: "clusterid is defined and cluster_vars is defined", fail_msg: "Please define clusterid" } + - assert: { that: "buildenv is defined and cluster_vars[buildenv] is defined", fail_msg: "Please define buildenv" } ## Tags/ labels must be compatible with GCP and AWS - check everything that goes into a label. - - assert: { that: "release_version is regex('^[a-z\\d\\-_]{0,63}$')", msg: "Please ensure release_version ({{release_version}}) is in the set [a-z\\d\\-_], and <63 characters long." } + - assert: { that: "release_version is regex('^[a-z\\d\\-_]{0,63}$')", fail_msg: "Please ensure release_version ({{release_version}}) is in the set [a-z\\d\\-_], and <63 characters long." } when: release_version is defined - - assert: { that: "cluster_suffix is regex('^[a-z\\d\\-_]{0,63}$')", msg: "Please ensure cluster_suffix ({{cluster_suffix}}) is in the set[a-z\\d\\-_], and <63 characters long." } + - assert: { that: "cluster_suffix is regex('^[a-z\\d\\-_]{0,63}$')", fail_msg: "Please ensure cluster_suffix ({{cluster_suffix}}) is in the set[a-z\\d\\-_], and <63 characters long." } when: cluster_suffix is defined - assert: { that: "'{%- for label in cluster_vars.custom_tagslabels -%}{% if not cluster_vars.custom_tagslabels[label] is regex('^[a-z\\d\\-_]{0,63}$') %}{{label}}: {{cluster_vars.custom_tagslabels[label]}}{% endif %}{%- endfor -%}' == ''", fail_msg: "Please ensure all cluster_vars.custom_tagslabels are in the set [a-z\\d\\-_], and <63 characters long." } when: "'custom_tagslabels' in cluster_vars" - assert: { that: "'{%- for hosttype in cluster_vars[buildenv].hosttype_vars -%}{% if ('version' in cluster_vars[buildenv].hosttype_vars[hosttype]) and (not cluster_vars[buildenv].hosttype_vars[hosttype].version is regex('^[a-z\\d\\-_]{0,63}$')) %}{{cluster_vars[buildenv].hosttype_vars[hosttype].version}}{% endif %}{%- endfor -%}' == ''", fail_msg: "Please ensure cluster_vars[{{buildenv}}].hosttype_vars[hosttype].version is in the set [a-z\\d\\-_], and <63 characters long." } - - assert: { that: "(cluster_vars.assign_public_ip == 'yes' and cluster_vars.inventory_ip == 'public') or (cluster_vars.inventory_ip == 'private')", msg: "If inventory_ip=='public', 'assign_public_ip' must be 'yes'" } + - assert: { that: "(cluster_vars.assign_public_ip == 'yes' and cluster_vars.inventory_ip == 'public') or (cluster_vars.inventory_ip == 'private')", fail_msg: "If inventory_ip=='public', 'assign_public_ip' must be 'yes'" } when: cluster_vars.type == "gcp" or cluster_vars.type == "aws" + + - assert: { that: "cluster_vars[buildenv] | json_query(\"hosttype_vars.*.auto_volumes[] | [?contains(`/dev/sdb,/dev/sdc,/dev/sdd,/dev/sde`, device_name) && volume_type!='ephemeral']\") | length == 0", fail_msg: "device_names /dev/sd[b-e] are only allowed for ephemeral volumes in AWS cluster_vars[buildenv].hosttype_vars. Please start non-ephemeral devices at /dev/sdf." } + when: cluster_vars.type == "aws" diff --git a/_dependencies/vars_plugins/cli_facts.py b/_dependencies/vars_plugins/cli_facts.py index 5cd07ccf..ea113fa7 100644 --- a/_dependencies/vars_plugins/cli_facts.py +++ b/_dependencies/vars_plugins/cli_facts.py @@ -7,7 +7,7 @@ DOCUMENTATION = ''' --- -cars: cli_facts +vars: argv, cliargs short_description: Expose the system ARGV and CLI arguments as facts in plays. version_added: "2.8" author: "Dougal Seeley" diff --git a/cluster_hosts/tasks/get_cluster_hosts_state.yml b/cluster_hosts/tasks/get_cluster_hosts_state.yml index 3571111a..e2cb9023 100644 --- a/cluster_hosts/tasks/get_cluster_hosts_state.yml +++ b/cluster_hosts/tasks/get_cluster_hosts_state.yml @@ -47,45 +47,6 @@ _cluster_hosts_state__urlregion: "{{r__gcp_compute_instance_info.results | json_query(\"[?resources[?labels]].resources[].{name: name, regionzone: zone, tagslabels: labels, instance_id: id, instance_state: status}\") }}" when: cluster_vars.type == "gcp" -- name: get_cluster_hosts_state/vmware | Get VMware cluster_hosts_state - block: - - name: get_cluster_hosts_state/vmware | Get existing VMware instance info - vmware_vm_info: - username: "{{ cluster_vars.esxi_username }}" - password: "{{ cluster_vars.esxi_password }}" - hostname: "{{ cluster_vars.esxi_ip }}" - validate_certs: no - register: r__vmware_vm_info - delegate_to: localhost - run_once: true - - - name: get_cluster_hosts_state/vmware | Get existing VMware instance facts - vmware_guest_info: - username: "{{ cluster_vars.esxi_username }}" - password: "{{ cluster_vars.esxi_password }}" - hostname: "{{ cluster_vars.esxi_ip }}" - validate_certs: no - datacenter: None - uuid: "{{item.uuid}}" - with_items: "{{ r__vmware_vm_info.virtual_machines | to_json | from_json | json_query(\"[?starts_with(guest_name, '\"+cluster_name+\"')]\") }}" - register: r__vmware_guest_info - delegate_to: localhost - run_once: true - - # Convert the annotations into a proper dictionary within the facts - - name: get_cluster_hosts_state/vmware | update r__vmware_guest_info result with json-parsed annotations - set_fact: - r__vmware_guest_info: | - {% set res = r__vmware_guest_info -%} - {%- for result in r__vmware_guest_info.results -%} - {%- set _ = result.instance.update({'annotation': result.instance.annotation | json_loads_loose}) -%} - {%- endfor -%} - {{ res }} - - - name: get_cluster_hosts_state/vmware | Set cluster_hosts_state - set_fact: - cluster_hosts_state: "{{ r__vmware_guest_info.results | json_query(\"[].{name: instance.hw_name, regionzone: None, tagslabels: instance.annotation, instance_id: instance.moid, instance_state: instance.hw_power_status}\") }}" - when: cluster_vars.type == "esxifree" - name: get_cluster_hosts_state | cluster_hosts_state debug: msg="{{cluster_hosts_state}}" diff --git a/config/tasks/disks_auto_aws.yml b/config/tasks/disks_auto_aws.yml new file mode 100644 index 00000000..bc2f8859 --- /dev/null +++ b/config/tasks/disks_auto_aws.yml @@ -0,0 +1,109 @@ +--- + +- name: disks_auto_aws | auto_volumes + debug: msg={{ cluster_vars[buildenv].hosttype_vars[hostvars[inventory_hostname].hosttype].auto_volumes }} + +- name: disks_auto_aws | cluster_hosts_target(inventory_hostname) + debug: msg={{ (cluster_hosts_target | selectattr('hostname', '==', inventory_hostname) | list | first)['auto_volumes'] }} + + +- name: disks_auto_aws | Mount volumes as individual disks + block: + - name: disks_auto_aws | Get the nvme information (pre-filesystem create) + ebsmap: + become: yes + register: r__ebsmap + + - name: disks_auto_aws | r__ebsmap (pre-filesystem create) + debug: msg={{r__ebsmap}} + + - name: disks_auto_aws | Create filesystem (partitionless) + become: yes + filesystem: + fstype: "{{ item.fstype }}" + dev: "{{ (r__ebsmap.device_map | selectattr('device_name_aws', '==', item.device_name) | list | last)['device_name_os'] }}" + loop: "{{auto_vols}}" + + - name: disks_auto_aws | Get the nvme information (post-filesystem create), to get the block IDs for mounting + ebsmap: + become: yes + register: r__ebsmap + + - name: disks_auto_aws | r__ebsmap (post-filesystem create) + debug: msg={{r__ebsmap}} + + - name: disks_auto_aws | Mount created filesytem(s) persistently + become: yes + mount: + path: "{{ item.mountpoint }}" + src: "UUID={{ (r__ebsmap.device_map | selectattr('device_name_aws', '==', item.device_name) | list | last)['UUID'] }}" + fstype: "{{ item.fstype }}" + state: mounted + opts: _netdev + loop: "{{auto_vols}}" + + - name: disks_auto_aws | change ownership of mountpoint (if set) + become: yes + file: + path: "{{ item.mountpoint }}" + state: directory + mode: "{{ item.perms.mode | default(omit)}}" + owner: "{{ item.perms.owner | default(omit)}}" + group: "{{ item.perms.group | default(omit)}}" + loop: "{{auto_vols}}" + when: (auto_vols | map(attribute='mountpoint') | list | unique | count == auto_vols | map(attribute='mountpoint') | list | count) + vars: + auto_vols: "{{ cluster_vars[buildenv].hosttype_vars[hostvars[inventory_hostname].hosttype].auto_volumes }}" + + +# The following block mounts all nvme attached volumes that have a single, common mountpoint, by creating a logical volume +- name: disks_auto_aws | Mount nvme volumes in a single mountpoint through LV/VG + block: + - name: disks_auto_aws | Install logical volume management tooling. (yum - RedHat/CentOS) + become: true + yum: + name: "lvm*" + state: present + when: ansible_os_family == 'RedHat' + + - name: disks_auto_aws | Get the nvme information (pre-filesystem create) + ebsmap: + become: yes + register: r__ebsmap + + - name: disks_auto_aws | r__ebsmap (pre-filesystem create) + debug: msg={{r__ebsmap}} + + - name: disks_auto_aws | Create a volume group from all nvme devices + become: yes + lvg: + vg: "{{ hosttype_vars.lvmparams.vg_name }}" + pvs: "{{ r__ebsmap.device_map | json_query(\"[?device_name_aws && contains('\" + auto_vol_device_names + \"', device_name_aws)].device_name_os\") | join(',')}}" + vars: + auto_vol_device_names: "{{hosttype_vars.auto_volumes | map(attribute='device_name') | sort | join(',')}}" + + - name: disks_auto_aws | Create a logical volume from volume group + become: yes + lvol: + vg: "{{ hosttype_vars.lvmparams.vg_name }}" + lv: "{{ hosttype_vars.lvmparams.lv_name }}" + size: "{{ hosttype_vars.lvmparams.lv_size }}" + + - name: disks_auto_aws | Create filesystem(s) on attached nvme volume(s) + become: yes + filesystem: + fstype: "{{ hosttype_vars.auto_volumes | map(attribute='fstype') | list | unique | join('') }}" + dev: "/dev/{{ hosttype_vars.lvmparams.vg_name }}/{{ hosttype_vars.lvmparams.lv_name }}" + force: no + + - name: disks_auto_aws | Mount created filesytem(s) persistently + become: yes + mount: + path: "{{ hosttype_vars.auto_volumes | map(attribute='mountpoint') | list | unique | join('') }}" + src: "/dev/{{ hosttype_vars.lvmparams.vg_name }}/{{ hosttype_vars.lvmparams.lv_name }}" + fstype: "{{ hosttype_vars.auto_volumes | map(attribute='fstype') | list | unique | join('') }}" + state: mounted + opts: _netdev + when: ('lvmparams' in hosttype_vars) and (hosttype_vars.auto_volumes | map(attribute='mountpoint') | list | unique | count == 1) and (hosttype_vars.auto_volumes | map(attribute='mountpoint') | list | count >= 2) and (hosttype_vars.auto_volumes | map(attribute='fstype') | list | unique | count == 1) + vars: + hosttype_vars: "{{ cluster_vars[buildenv].hosttype_vars[hostvars[inventory_hostname].hosttype] }}" diff --git a/config/tasks/disks_auto_aws_nvme.yml b/config/tasks/disks_auto_aws_nvme.yml deleted file mode 100644 index c1623f2a..00000000 --- a/config/tasks/disks_auto_aws_nvme.yml +++ /dev/null @@ -1,112 +0,0 @@ ---- -#- debug: msg={{ cluster_vars[buildenv].hosttype_vars[hostvars[inventory_hostname].hosttype].nvme_volumes }} -#- debug: msg={{ ansible_facts.devices }} -- block: - - name: autodisks_nvme | Get unused block devices - set_fact: - block_devices: "{{ {'dev': item, 'size_b': (ansible_facts.devices[item].sectors|int) * (ansible_facts.devices[item].sectorsize|int)} }}" - with_items: "{{ ansible_facts.devices }}" - register: block_devices_list - when: item | regex_search("nvme") and ansible_facts.devices[item].partitions == {} - - - name: autodisks_nvme | Create unused block devices list - set_fact: - lsblk_volumes: "{{ block_devices_list.results | map(attribute='ansible_facts.block_devices') | select('defined') | list }}" - - - name: autodisks_nvme | lsblk_volumes - debug: msg={{ lsblk_volumes }} - - - name: autodisks_nvme | Create 'nvmevols' fact that contains a list of available host nvme devices (lsblk) mapped to the mountpoints defined in cluster_vars. Handles single mounting points with LV/VG - set_fact: - nvmevols: | - {% set res = [] -%} - {% set tmp_blkvols = lsblk_volumes -%} - {%- for nvmevol in cluster_vars[buildenv].hosttype_vars[hostvars[inventory_hostname].hosttype].nvme.volumes -%} - {%- set blkvolloop = namespace(break=false) -%} - {%- for blkvol in tmp_blkvols if not blkvolloop.break -%} - {%- if (nvmevol.volume_size*1000000000|int) == (blkvol.size_b|int) -%} - {%- set _ = res.extend([ {'device': '/dev/'+blkvol.dev, 'mountpoint': nvmevol.mountpoint, 'fstype': nvmevol.fstype, 'perms': autovol.perms | default({})}]) -%} - {%- set _ = tmp_blkvols.remove(blkvol) -%} - {%- set blkvolloop.break = true -%} - {%- endif -%} - {%- endfor -%} - {%- endfor -%} - {{ res }} - - - name: autodisks_nvme | nvme mountpoints - debug: msg={{ nvmevols | map(attribute='mountpoint') | list | unique }} - - # The following block mounts all nvme attached volumes that have individual mountpoints - - name: autodisks_nvme | Mount nvme volumes with different mountpoints - block: - - name: autodisks_nvme | Create filesystem(s) on attached nvme volume(s) - become: yes - filesystem: - fstype: "{{ item.fstype }}" - dev: "{{ item.device }}" - force: no - with_items: "{{ nvmevols }}" - - - name: autodisks_nvme | Mount nvme created filesytem(s) persistently - become: yes - mount: - path: "{{ item.mountpoint }}" - src: "{{ item.device }}" - fstype: "{{ item.fstype }}" - state: mounted - opts: _netdev - with_items: "{{ nvmevols }}" - - - name: autodisks_nvme | change ownership of mountpoint (if set) - become: yes - file: - path: "{{ item.mountpoint }}" - state: directory - mode: "{{ item.perms.mode | default(omit)}}" - owner: "{{ item.perms.owner | default(omit)}}" - group: "{{ item.perms.group | default(omit)}}" - with_items: "{{ nvmevols }}" - when: (nvmevols | map(attribute='mountpoint') | list | unique | count == nvmevols | map(attribute='mountpoint') | list | count) - - # The following block mounts all nvme attached volumes that have a single, common mountpoint, by creating a logical volume - - name: autodisks_nvme | Mount nvme volumes in a single mountpoint through LV/VG - block: - #- debug: msg={{nvmevols | map(attribute='device') | join(',')}} - - - name: autodisks_nvme | Install logical volume management tooling. (yum - RedHat/CentOS) - become: true - yum: - name: "lvm*" - state: present - when: ansible_os_family == 'RedHat' - - - name: autodisks_nvme | Create a volume group from all nvme devices - become: yes - lvg: - vg: "{{ cluster_vars[buildenv].hosttype_vars[hostvars[inventory_hostname].hosttype].nvme.lvmparams.vg_name }}" - pvs: "{{nvmevols | map(attribute='device') | join(',')}}" - - - name: autodisks_nvme | Create a logical volume from volume group - become: yes - lvol: - vg: "{{ cluster_vars[buildenv].hosttype_vars[hostvars[inventory_hostname].hosttype].nvme.lvmparams.vg_name }}" - lv: "{{ cluster_vars[buildenv].hosttype_vars[hostvars[inventory_hostname].hosttype].nvme.lvmparams.lv_name }}" - size: "{{ cluster_vars[buildenv].hosttype_vars[hostvars[inventory_hostname].hosttype].nvme.lvmparams.lv_size }}" - - - name: autodisks_nvme | Create filesystem(s) on attached nvme volume(s) - become: yes - filesystem: - fstype: "{{ nvmevols | map(attribute='fstype') | list | unique | join('') }}" - dev: "/dev/{{ cluster_vars[buildenv].hosttype_vars[hostvars[inventory_hostname].hosttype].nvme.lvmparams.vg_name }}/{{ cluster_vars[buildenv].hosttype_vars[hostvars[inventory_hostname].hosttype].nvme.lvmparams.lv_name }}" - force: no - - - name: autodisks_nvme | Mount created filesytem(s) persistently - become: yes - mount: - path: "{{ nvmevols | map(attribute='mountpoint') | list | unique | join('') }}" - src: "/dev/{{ cluster_vars[buildenv].hosttype_vars[hostvars[inventory_hostname].hosttype].nvme.lvmparams.vg_name }}/{{ cluster_vars[buildenv].hosttype_vars[hostvars[inventory_hostname].hosttype].nvme.lvmparams.lv_name }}" - fstype: "{{ nvmevols | map(attribute='fstype') | list | unique | join('') }}" - state: mounted - opts: _netdev - when: (nvmevols | map(attribute='mountpoint') | list | unique | count == 1) and (nvmevols | map(attribute='mountpoint') | list | count >= 2) and (nvmevols | map(attribute='fstype') | list | unique | count == 1) - when: (cluster_vars[buildenv].hosttype_vars[hostvars[inventory_hostname].hosttype].nvme.volumes is defined) and (cluster_vars[buildenv].hosttype_vars[hostvars[inventory_hostname].hosttype].nvme.volumes|length > 0) and (cluster_vars[buildenv].hosttype_vars[hostvars[inventory_hostname].hosttype].nvme.volumes != "[]") diff --git a/config/tasks/disks_auto.yml b/config/tasks/disks_auto_generic.yml similarity index 53% rename from config/tasks/disks_auto.yml rename to config/tasks/disks_auto_generic.yml index 4ce7db03..cde01eac 100644 --- a/config/tasks/disks_auto.yml +++ b/config/tasks/disks_auto_generic.yml @@ -1,43 +1,45 @@ --- + #- debug: msg={{ cluster_vars[buildenv].hosttype_vars[hostvars[inventory_hostname].hosttype].auto_volumes }} #- debug: msg={{ ansible_facts.devices }} + - block: - - name: autodisks | Get unused block devices + - name: disks_auto_generic | Get unused block devices set_fact: block_devices: "{{ {'dev': item, 'size_b': (ansible_facts.devices[item].sectors|int) * (ansible_facts.devices[item].sectorsize|int)} }}" with_items: "{{ ansible_facts.devices }}" register: block_devices_list when: item | regex_search("nvme|[xvsh]+d") and ansible_facts.devices[item].partitions == {} - - name: autodisks | Create unused block devices list + - name: disks_auto_generic | Create unused block devices list set_fact: lsblk_volumes: "{{ block_devices_list.results | map(attribute='ansible_facts.block_devices') | select('defined') | list }}" - - name: autodisks | lsblk_volumes + - name: disks_auto_generic | lsblk_volumes debug: msg={{ lsblk_volumes }} -- name: autodisks | Create 'hostvols' fact that contains a list of available host devices (lsblk) mapped to the mountpoints defined in cluster_vars. Allow for multiple disks with same size. +- name: disks_auto_generic | Create 'hostvols' fact that contains a list of available host devices (lsblk) mapped to the mountpoints defined in cluster_vars. Allow for multiple disks with same size. set_fact: hostvols: | {% set res = [] -%} {% set tmp_blkvols = lsblk_volumes -%} - {%- for autovol in cluster_vars[buildenv].hosttype_vars[hostvars[inventory_hostname].hosttype].auto_volumes -%} - {%- set blkvolloop = namespace(break=false) -%} - {%- for blkvol in tmp_blkvols if not blkvolloop.break -%} - {%- if (autovol.volume_size*1073741824|int) == (blkvol.size_b|int) -%} - {%- set _ = res.extend([ {'device': '/dev/'+blkvol.dev, 'mountpoint': autovol.mountpoint, 'fstype': autovol.fstype, 'perms': autovol.perms | default({})}]) -%} - {%- set _ = tmp_blkvols.remove(blkvol) -%} - {%- set blkvolloop.break = true -%} - {%- endif -%} - {%- endfor -%} + {% set inventory_hostname__no_suffix = inventory_hostname | regex_replace('-(?!.*-).*') -%} + {%- for autovol in cluster_hosts_target | to_json | from_json | json_query('[?starts_with(hostname, \'' + inventory_hostname__no_suffix + '\')].auto_volumes[]') -%} + {%- set blkvolloop = namespace(break=false) -%} + {%- for blkvol in tmp_blkvols if not blkvolloop.break -%} + {%- if (autovol.volume_size*1073741824|int) == (blkvol.size_b|int) -%} + {%- set _ = res.extend([ {'device': '/dev/'+blkvol.dev, 'mountpoint': autovol.mountpoint, 'fstype': autovol.fstype, 'perms': autovol.perms | default({})}]) -%} + {%- set _ = tmp_blkvols.remove(blkvol) -%} + {%- set blkvolloop.break = true -%} + {%- endif -%} {%- endfor -%} {{ res }} -#- name: autodisks | hostvols +#- name: disks_auto_generic | hostvols # debug: msg={{hostvols}} # Create partition-less filesystems. -- name: autodisks | Create filesystem(s) on attached volume(s) +- name: disks_auto_generic | Create filesystem(s) on attached volume(s) become: yes filesystem: fstype: "{{ item.fstype }}" @@ -49,7 +51,7 @@ delay: 1 until: created_filesystem is not failed -- name: autodisks | Mount created filesytem(s) persistently +- name: disks_auto_generic | Mount created filesytem(s) persistently become: yes mount: path: "{{ item.mountpoint }}" @@ -59,7 +61,7 @@ opts: _netdev with_items: "{{ hostvols }}" -- name: autodisks | change ownership of mountpoint (if set) +- name: disks_auto_generic | change ownership of mountpoint (if set) become: yes file: path: "{{ item.mountpoint }}" diff --git a/config/tasks/main.yml b/config/tasks/main.yml index e842014a..5a23db5e 100644 --- a/config/tasks/main.yml +++ b/config/tasks/main.yml @@ -52,13 +52,13 @@ mode: 0755 when: (static_journal is defined and static_journal|bool) - # Run this *before* the general auto_volumes tasks, because we need them to be eliminated before we try to mount the other disks. -- name: Attach nvme_volumes - include_tasks: disks_auto_aws_nvme.yml +- name: Create partition table, format and attach volumes - AWS + include_tasks: disks_auto_aws.yml when: cluster_vars.type == "aws" -- name: Attach auto_volumes - include_tasks: disks_auto.yml +- name: Create partition table, format and attach volumes - generic + include_tasks: disks_auto_generic.yml + when: cluster_vars.type != "aws" - name: install prometheus node exporter daemon include_tasks: prometheus_node_exporter.yml diff --git a/config/tasks/metricbeat.yml b/config/tasks/metricbeat.yml old mode 100755 new mode 100644 diff --git a/create/tasks/aws.yml b/create/tasks/aws.yml index e779dc28..4aee2484 100644 --- a/create/tasks/aws.yml +++ b/create/tasks/aws.yml @@ -1,5 +1,8 @@ --- +- name: cluster_hosts_target_denormalised_by_volume + debug: msg="{{cluster_hosts_target_denormalised_by_volume}}" + - name: create/aws | Create AWS security group ec2_group: name: "{{ cluster_name }}-sg" @@ -20,6 +23,16 @@ - name: create/aws | Create EC2 VMs asynchronously and wait for completion block: + - name: create/aws | Detach volumes from previous instances (during the _scheme_rmvm_keepdisk_rollback redeploy, we only redeploy one host at a time, and it is already powered off) + ec2_vol: + aws_access_key: "{{cluster_vars[buildenv].aws_access_key}}" + aws_secret_key: "{{cluster_vars[buildenv].aws_secret_key}}" + region: "{{cluster_vars.region}}" + id: "{{item.auto_volume.src.volume_id}}" + instance: None + loop: "{{ cluster_hosts_target_denormalised_by_volume }}" + when: "'src' in item.auto_volume" + - name: create/aws | Create EC2 VMs asynchronously ec2: aws_access_key: "{{cluster_vars[buildenv].aws_access_key}}" @@ -35,9 +48,8 @@ wait: yes instance_tags: "{{ _instance_tags | combine(cluster_vars.custom_tagslabels | default({})) }}" termination_protection: "{{cluster_vars[buildenv].termination_protection}}" - volumes: "{{ item.auto_volumes | default([]) }}" - count_tag: - Name: "{{item.hostname}}" + volumes: "{{ item.auto_volumes | selectattr('src', 'undefined') | list | default([]) }}" + count_tag: { Name: "{{item.hostname}}" } exact_count: 1 vars: _instance_tags: @@ -51,56 +63,68 @@ maintenance_mode: "true" release: "{{ release_version }}" lifecycle_state: "current" - with_items: "{{cluster_hosts_target}}" + loop: "{{ cluster_hosts_target }}" async: 7200 poll: 0 - register: aws_instances + register: r__ec2 - name: create/aws | Wait for aws instance creation to complete - async_status: - jid: "{{ item.ansible_job_id }}" - register: aws_jobs - until: aws_jobs.finished + async_status: { jid: "{{ item.ansible_job_id }}" } + register: r__async_status__ec2 + until: r__async_status__ec2.finished delay: 3 retries: 300 - with_items: "{{aws_instances.results}}" + with_items: "{{r__ec2.results}}" -# - name: create/aws | aws_jobs.results -# debug: msg={{aws_jobs.results}} +# - name: create/aws | r__async_status__ec2.results +# debug: msg={{r__async_status__ec2.results}} - name: create/aws | Set a fact containing the newly-created hosts set_fact: - cluster_hosts_created: "{{ aws_jobs.results | json_query(\"[?changed==`true`].item.item\") }}" + cluster_hosts_created: "{{ r__async_status__ec2.results | json_query(\"[?changed==`true`].item.item\") }}" - - name: create/aws | Force set maintenance_mode to true (when prometheus_set_unset_maintenance_mode) - ec2_tag: + - name: create/aws | Attach (or create) volumes where 'src' is present (e.g. inserted as part of _scheme_rmvm_keepdisk_rollback scheme) + ec2_vol: aws_access_key: "{{cluster_vars[buildenv].aws_access_key}}" aws_secret_key: "{{cluster_vars[buildenv].aws_secret_key}}" region: "{{cluster_vars.region}}" - resource: "{{ item }}" - tags: - maintenance_mode: "true" - with_items: "{{ aws_jobs.results | json_query('[].tagged_instances[0].id')}}" - when: (prometheus_set_unset_maintenance_mode is defined and prometheus_set_unset_maintenance_mode|bool) + instance: "{{ r__async_status__ec2.results | json_query(\"[].tagged_instances[?tags.Name==`\" + item.hostname + \"`].id[] | [0]\") | default(omit) }}" + id: "{{item.auto_volume.src.volume_id | default(omit)}}" + snapshot: "{{item.auto_volume.snapshot | default(omit)}}" + device_name: "{{item.auto_volume.device_name}}" + encrypted: "{{item.auto_volume.encrypted}}" + volume_size: "{%- if 'src' not in item.auto_volume -%}{{item.auto_volume.volume_size}}{%- endif -%}" + volume_type: "{{item.auto_volume.volume_type}}" + delete_on_termination: yes + loop: "{{ cluster_hosts_target_denormalised_by_volume| selectattr('src', 'defined') | list }}" + async: 7200 + poll: 0 + register: r__ec2_vol - - name: create/aws | Extract EBS volume data so we can tag the disks - set_fact: - ebsdata: | - {% set res = [] -%} - {%- for host in aws_jobs.results -%} - {%- for devkey in host.tagged_instances[0].block_device_mapping.keys()-%} - {% set _dummy = res.extend([{ - 'hostname': host.tagged_instances[0].tags.Name, - 'ec2_id': host.tagged_instances[0].id, - 'device_name': devkey, - 'volume_id': host.tagged_instances[0].block_device_mapping[devkey].volume_id - }]) -%} - {%- endfor %} - {%- endfor %} - {{ res }} + - name: create/aws | Wait for volume creation/ attachment to complete + async_status: { jid: "{{ item.ansible_job_id }}" } + register: r__async_status__ec2_vol + until: r__async_status__ec2_vol.finished + delay: 3 + retries: 300 + with_items: "{{r__ec2_vol.results}}" + +# - name: create/aws | r__async_status__ec2_vol +# debug: msg={{r__async_status__ec2_vol}} -# - name: create/aws | ebsdata -# debug: msg={{ebsdata}} + +- name: create/aws | Tag the EBS volumes + block: + - name: create/aws | Get the ec2_instance_info for EBS tagging + ec2_instance_info: + filters: + "instance-state-name": ["running", "stopped"] + "tag:cluster_name": "{{cluster_name}}" + "tag:lifecycle_state": "current" + aws_access_key: "{{cluster_vars[buildenv].aws_access_key}}" + aws_secret_key: "{{cluster_vars[buildenv].aws_secret_key}}" + region: "{{cluster_vars.region}}" + register: r__ec2_instance_info - name: create/aws | Set the ec2 volume name tag ec2_tag: @@ -108,6 +132,21 @@ aws_secret_key: "{{cluster_vars[buildenv].aws_secret_key}}" region: "{{cluster_vars.region}}" resource: "{{item.volume_id}}" - tags: + tags: "{{ _tags | combine(cluster_vars.custom_tagslabels | default({})) }}" + with_items: "{{_ec2_vols_denormalised_by_device}}" + vars: + _ec2_vols_denormalised_by_device: | + {% set res = [] -%} + {%- for host_instance in r__ec2_instance_info.instances -%} + {%- for block_device in host_instance.block_device_mappings -%} + {% set _ = res.append({'hostname': host_instance.tags.Name, 'hosttype': host_instance.tags.hosttype, 'device_name': block_device.device_name, 'volume_id': block_device.ebs.volume_id}) -%} + {%- endfor %} + {%- endfor %} + {{ res }} + _tags: Name: "{{ item.hostname }}--{{item.device_name | regex_replace('^.*\\/(.*)', '\\1')}}" - with_items: "{{ebsdata}}" + device_name: "{{item.device_name}}" + inv_node_version: "{{cluster_vars[buildenv].hosttype_vars[item.hosttype].version | default(omit)}}" + inv_node_type: "{{item.hosttype}}" + owner: "{{ lookup('env','USER') | lower }}" + release: "{{ release_version }}" diff --git a/create/tasks/main.yml b/create/tasks/main.yml index d6e95706..dfbb3db4 100644 --- a/create/tasks/main.yml +++ b/create/tasks/main.yml @@ -23,3 +23,21 @@ - name: "Create {{cluster_vars.type}} cluster" include_tasks: "{{cluster_vars.type}}.yml" + vars: + # auto_volumes are normally a list of volumes per host. We cannot iterate this within a non-nested ansible loop(with_items), so we denormalise/ flatten it into a new one-dimensional list, of each volume, as well as all the parent host information. + cluster_hosts_target_denormalised_by_volume: | + {% set res = [] -%} + {%- for cht_host in cluster_hosts_target -%} + {%- for autovol in cht_host.auto_volumes -%} + {%- set elem = {} -%} + {%- for cht_host_key in cht_host.keys() -%} + {%- if cht_host_key != 'auto_volumes' -%} + {%- set _ = elem.update({cht_host_key: cht_host[cht_host_key]}) -%} + {%- else -%} + {%- set _ = elem.update({'auto_volume': autovol}) -%} + {%- endif -%} + {%- endfor -%} + {%- set _ = res.append(elem) -%} + {%- endfor -%} + {%- endfor -%} + {{res}} diff --git a/redeploy/__common/tasks/poweron_vms.yml b/redeploy/__common/tasks/poweron_vms.yml index 551c4fad..9a8c5d6a 100644 --- a/redeploy/__common/tasks/poweron_vms.yml +++ b/redeploy/__common/tasks/poweron_vms.yml @@ -18,7 +18,6 @@ run_once: true when: cluster_vars.type == "aws" - - name: poweron_vms | Power-on GCP GCE VM(s) asynchronously block: - name: poweron_vms | Power-on GCP GCE VM(s) diff --git a/redeploy/__common/tasks/set_lifecycle_state_label.yml b/redeploy/__common/tasks/set_lifecycle_state_label.yml index 045171e2..a2d92ea3 100644 --- a/redeploy/__common/tasks/set_lifecycle_state_label.yml +++ b/redeploy/__common/tasks/set_lifecycle_state_label.yml @@ -1,4 +1,5 @@ --- + - name: set_lifecycle_state_label | hosts_to_relabel debug: msg="{{hosts_to_relabel}}" diff --git a/redeploy/_scheme_rmvm_keepdisk_rollback/tasks/_add_src_diskinfo_to_cluster_hosts_target__aws.yml b/redeploy/_scheme_rmvm_keepdisk_rollback/tasks/_add_src_diskinfo_to_cluster_hosts_target__aws.yml new file mode 100644 index 00000000..07db0737 --- /dev/null +++ b/redeploy/_scheme_rmvm_keepdisk_rollback/tasks/_add_src_diskinfo_to_cluster_hosts_target__aws.yml @@ -0,0 +1,33 @@ +--- + +- name: _get_diskinfo_aws | ec2_instance_info + ec2_instance_info: + filters: + "instance-state-name": ["running", "stopped"] + "tag:cluster_name": "{{cluster_name}}" + aws_access_key: "{{cluster_vars[buildenv].aws_access_key}}" + aws_secret_key: "{{cluster_vars[buildenv].aws_secret_key}}" + region: "{{cluster_vars.region}}" + register: r__ec2_instance_info + +- name: _get_diskinfo_aws | r__ec2_instance_info + debug: msg={{r__ec2_instance_info}} + +- name: _get_diskinfo_aws | augment cluster_hosts_target auto_volumes with source disk info + set_fact: + cluster_hosts_target: | + {%- for cht_host in cluster_hosts_target -%} + {%- for cht_autovol in cht_host.auto_volumes -%} + {%- for chs_host_info_result in r__ec2_instance_info.instances | selectattr('tags.lifecycle_state', '!=', 'current') -%} + {%- if cht_host.hostname | regex_replace('-(?!.*-).*') == chs_host_info_result.tags.Name | regex_replace('-(?!.*-).*') -%} + {%- for chs_host_diskinfo in chs_host_info_result.block_device_mappings | selectattr('device_name', '==', cht_autovol.device_name) -%} + {%- set _ = cht_autovol.update({'src': {'instance_id': chs_host_info_result.instance_id, 'device_name': chs_host_diskinfo.device_name, 'volume_id': chs_host_diskinfo.ebs.volume_id }}) -%} + {%- endfor -%} + {%- endif -%} + {%- endfor -%} + {%- endfor -%} + {%- endfor -%} + {{cluster_hosts_target}} + +- name: _get_diskinfo_aws | cluster_hosts_target + debug: msg={{cluster_hosts_target}} diff --git a/redeploy/_scheme_rmvm_keepdisk_rollback/tasks/by_hosttype.yml b/redeploy/_scheme_rmvm_keepdisk_rollback/tasks/by_hosttype.yml new file mode 100644 index 00000000..6cc2b25d --- /dev/null +++ b/redeploy/_scheme_rmvm_keepdisk_rollback/tasks/by_hosttype.yml @@ -0,0 +1,23 @@ +--- + +- name: set hosts_to_redeploy if canary==start + set_fact: hosts_to_redeploy={{(cluster_hosts_target_by_hosttype[hosttype] | sort(attribute='hostname'))[:1]}} + when: (canary is defined and canary=="start") + +- name: set hosts_to_redeploy if canary==finish + set_fact: hosts_to_redeploy={{(cluster_hosts_target_by_hosttype[hosttype] | sort(attribute='hostname'))[1:]}} + when: (canary is defined and canary=="finish") + +- name: set hosts_to_redeploy if canary==none + set_fact: hosts_to_redeploy={{(cluster_hosts_target_by_hosttype[hosttype] | sort(attribute='hostname'))}} + when: (canary is defined and canary=="none") + +- debug: msg="Canary redeploy ({{canary}}) selected; deleting and redeploying [{{hosts_to_redeploy | json_query('[].hostname') | join(', ')}}]" + when: (canary is defined) + + +- name: Run redeploy per host. Delete one at a time, then reprovision. + include_tasks: by_hosttype_by_host.yml + with_items: "{{ hosts_to_redeploy }}" + loop_control: + loop_var: host_to_redeploy diff --git a/redeploy/_scheme_rmvm_keepdisk_rollback/tasks/by_hosttype_by_host.yml b/redeploy/_scheme_rmvm_keepdisk_rollback/tasks/by_hosttype_by_host.yml new file mode 100644 index 00000000..a1f88ea4 --- /dev/null +++ b/redeploy/_scheme_rmvm_keepdisk_rollback/tasks/by_hosttype_by_host.yml @@ -0,0 +1,50 @@ +--- + +- debug: msg="by_hosttype_by_host | Attempting to redeploy {{host_to_redeploy.hostname}}" + +- name: stop/ remove previous instance + block: + - name: by_hosttype_by_host | run predeleterole role + include_role: + name: "{{predeleterole}}" + vars: + hosts_to_remove: "{{ hosts_to_stop }}" + when: predeleterole is defined and predeleterole != "" + + - name: by_hosttype_by_host | Power off old VM + include_role: + name: clusterverse/redeploy/__common + tasks_from: poweroff_vms.yml + vars: + _host_to_redeploy_nosuffix: "{{host_to_redeploy.hostname | regex_replace('-(?!.*-).*')}}" #Remove the cluster_suffix from the hostname + hosts_to_stop: "{{ cluster_hosts_state | to_json | from_json | json_query(\"[?tagslabels.lifecycle_state!='current' && starts_with(name, '\" + _host_to_redeploy_nosuffix + \"')]\") }}" + +- name: "by_hosttype_by_host | Run {{mainclusteryml}} to add {{host_to_redeploy.hostname}} to cluster" + shell: "{{ (argv | join(' ')) | regex_replace('redeploy.yml', mainclusteryml) }} -e cluster_suffix={{cluster_suffix}} -e '{'cluster_hosts_target': [{{host_to_redeploy | to_json}}]}'" + register: r__mainclusteryml + no_log: True + ignore_errors: yes +- debug: msg="{{[r__mainclusteryml.stdout_lines] + [r__mainclusteryml.stderr_lines]}}" + failed_when: r__mainclusteryml is failed + when: r__mainclusteryml is failed or (debug_nested_log_output is defined and debug_nested_log_output|bool) + +- name: by_hosttype_by_host | re-acquire cluster_hosts_state (NOT cluster_hosts_target, as we are augmenting this in _add_src_diskinfo_to_cluster_hosts_target__ (also, it is not affected by change of state)) + import_role: + name: clusterverse/cluster_hosts + tasks_from: get_cluster_hosts_state.yml + +- name: by_hosttype_by_host | Power on new VM (not needed for normal redeploy, but for rescue case) + include_role: + name: clusterverse/redeploy/__common + tasks_from: poweron_vms.yml + vars: + hosts_to_start: "{{ cluster_hosts_state | selectattr('name', '==', host_to_redeploy.hostname) | list }}" + +- name: by_hosttype_by_host | re-acquire the dynamic inventory + include_role: + name: clusterverse/dynamic_inventory + +- name: by_hosttype_by_host | re-acquire cluster_hosts_state (NOT cluster_hosts_target, as we are augmenting this in _add_src_diskinfo_to_cluster_hosts_target__ (also, it is not affected by change of state)) + import_role: + name: clusterverse/cluster_hosts + tasks_from: get_cluster_hosts_state.yml diff --git a/redeploy/_scheme_rmvm_keepdisk_rollback/tasks/main.yml b/redeploy/_scheme_rmvm_keepdisk_rollback/tasks/main.yml new file mode 100644 index 00000000..65fb69d7 --- /dev/null +++ b/redeploy/_scheme_rmvm_keepdisk_rollback/tasks/main.yml @@ -0,0 +1,115 @@ +--- + +- name: Include preflight checks/ assertions. + include_tasks: preflight.yml + +- name: Redeploy by hosttype; rollback on fail + block: + - name: Redeploy setup + block: + - name: Change lifecycle_state label from 'current' to 'retiring' + include_role: + name: clusterverse/redeploy/__common + tasks_from: set_lifecycle_state_label.yml + vars: + hosts_to_relabel: "{{ cluster_hosts_state | json_query(\"[?tagslabels.lifecycle_state=='current']\") }}" + new_state: "retiring" + when: ('retiring' not in (cluster_hosts_state | map(attribute='tagslabels.lifecycle_state'))) + + - name: re-acquire cluster_hosts_target and cluster_hosts_state + include_role: + name: clusterverse/cluster_hosts + public: yes + + - assert: { that: "cluster_hosts_state | json_query(\"[?tagslabels.cluster_suffix == '\"+ cluster_suffix +\"']\") | length == 0", msg: "Please ensure cluster_suffix ({{cluster_suffix}}) is not already set on the cluster" } + when: cluster_suffix is defined + when: (canary=="start" or canary=="none") + + - name: Add the disk info from previous instances to cluster_hosts_target + include_tasks: "_add_src_diskinfo_to_cluster_hosts_target__{{cluster_vars.type}}.yml" + + - name: Run redeploy per hosttype. Create one at a time, then stop previous. + include_tasks: by_hosttype.yml + with_items: "{{ myhosttypes_array }}" + loop_control: + loop_var: hosttype + vars: + cluster_hosts_target_by_hosttype: "{{cluster_hosts_target | dict_agg('hosttype')}}" + myhosttypes_array: "{%- if myhosttypes is defined -%} {{ myhosttypes.split(',') }} {%- else -%} {{ cluster_hosts_target_by_hosttype.keys() | list }} {%- endif -%}" + + - fail: + when: testfail is defined and testfail == "fail_1" + + - name: re-acquire cluster_hosts_target and cluster_hosts_state (For the '-e canary=tidy' option. This can't be run in the tidy block below because that block depends on this info being correct) + import_role: + name: clusterverse/cluster_hosts + when: (canary_tidy_on_success is defined and canary_tidy_on_success|bool) + + rescue: + - debug: msg="Rescuing" + + - name: rescue | re-acquire cluster_hosts_target and cluster_hosts_state + import_role: + name: clusterverse/cluster_hosts + + - name: rescue | Change lifecycle_state label from 'current' to 'redeployfail' + include_role: + name: clusterverse/redeploy/__common + tasks_from: set_lifecycle_state_label.yml + vars: + hosts_to_relabel: "{{ cluster_hosts_state | json_query(\"[?tagslabels.lifecycle_state=='current']\") }}" + new_state: "redeployfail" + + - name: rescue | Change lifecycle_state label from 'retiring' to 'current' + include_role: + name: clusterverse/redeploy/__common + tasks_from: set_lifecycle_state_label.yml + vars: + hosts_to_relabel: "{{ cluster_hosts_state | json_query(\"[?tagslabels.lifecycle_state=='retiring']\") }}" + new_state: "current" + + - name: rescue | re-acquire cluster_hosts_target and cluster_hosts_state + import_role: + name: clusterverse/cluster_hosts + + - name: rescue | Add the disk info from previous instances to cluster_hosts_target + include_tasks: "_add_src_diskinfo_to_cluster_hosts_target__{{cluster_vars.type}}.yml" + + - name: rescue | explicitly specify only the relevant cluster.yml roles to run for rescuing + set_fact: + argv: "{{argv + ['--tags'] + ['clusterverse_create,clusterverse_dynamic_inventory,clusterverse_readiness'] }}" + + - name: rescue | Run redeploy per hosttype. Create one at a time, then stop previous. + include_tasks: by_hosttype.yml + with_items: "{{ myhosttypes_array }}" + loop_control: + loop_var: hosttype + vars: + cluster_hosts_target_by_hosttype: "{{cluster_hosts_target | dict_agg('hosttype')}}" + myhosttypes_array: "{%- if myhosttypes is defined -%} {{ myhosttypes.split(',') }} {%- else -%} {{ cluster_hosts_target_by_hosttype.keys() | list }} {%- endif -%}" + + - name: rescue | end_play to prevent tidying of pre-rescued VMs + meta: end_play + when: canary!="tidy" + + +- name: "Tidy up powered-down, non-current instances. NOTE: Must do clean_dns first, because both clean_dns and clean_vms have the cluster_hosts role as a dependency, which when run after clean_vms, will be empty." + block: + - assert: { that: "'current' in (cluster_hosts_state | map(attribute='tagslabels.lifecycle_state'))", msg: "ERROR - Cannot tidy when there are no machines in the 'current' lifecycle_state. Please use '-e clean=_all_'." } + + - include_role: + name: clusterverse/clean + tasks_from: clean_dns.yml + when: (hosts_to_clean | length) and (cluster_vars.dns_server is defined and cluster_vars.dns_server != "") and (cluster_vars.dns_user_domain is defined and cluster_vars.dns_user_domain != "") + + - include_role: + name: clusterverse/clean + tasks_from: clean_vms.yml + when: (hosts_to_clean | length) + + - debug: + msg: "tidy | No hosts to tidy. Only powered-down, non-current machines with be tidied; to clean other machines, please use the '-e clean=' extra variable." + when: hosts_to_clean | length == 0 + vars: + hosts_to_clean: "{{ cluster_hosts_state | json_query(\"[?tagslabels.lifecycle_state!='current' && !(contains('RUNNING,running', instance_state))]\") }}" + when: canary=="tidy" or ((canary=="none" or canary=="finish") and canary_tidy_on_success is defined and canary_tidy_on_success|bool) diff --git a/redeploy/_scheme_rmvm_keepdisk_rollback/tasks/preflight.yml b/redeploy/_scheme_rmvm_keepdisk_rollback/tasks/preflight.yml new file mode 100644 index 00000000..dbc99a0a --- /dev/null +++ b/redeploy/_scheme_rmvm_keepdisk_rollback/tasks/preflight.yml @@ -0,0 +1,39 @@ +--- + +- name: Preflight check + block: + - block: + - assert: { that: "cluster_vars.type != 'gcp'", fail_msg: "This scheme is not supported on GCP." } + + - name: Preflight check | get ec2_instance_info for current disk information + ec2_instance_info: + filters: { "instance-state-name": [ "running", "stopped" ], "tag:cluster_name": "{{cluster_name}}", "tag:lifecycle_state": "current" } + aws_access_key: "{{cluster_vars[buildenv].aws_access_key}}" + aws_secret_key: "{{cluster_vars[buildenv].aws_secret_key}}" + region: "{{cluster_vars.region}}" + register: r__ec2_instance_info + + - assert: { that: "_invalid_disks | length == 0", fail_msg: "Disks cannot be attached to /dev/sd[b-e] after the instance has been created (these are supposed to be ephemeral mounts only, so can only exist if created with the VM). [Found on: {{ _invalid_disks | join(',')}}]. If you have EBS disks, you'll need to move them to another mount point (a redeploy scheme that replaces the disks will do this" } + vars: { _invalid_disks: "{{ r__ec2_instance_info.instances | json_query(\"[?block_device_mappings[?contains(`/dev/sdb,/dev/sdc,/dev/sdd,/dev/sde`, device_name)]].tags.Name\") }}" } + + - assert: { that: "ec2_disks_is_subset_of_target_disks|bool==false", fail_msg: "Existing EBS disks must be the same as (or a subset of) the auto_volumes defined in your cluster_vars definition.", success_msg: "Success - Existing EBS disks are the same as (or a subset of) the auto_volumes defined in your cluster_vars definition" } + vars: + ec2_disks_is_subset_of_target_disks: |- + {%- set testloop = namespace(is_not_subset=false) -%} + {%- for cht_host in cluster_hosts_target | json_query('[].{hostname: hostname, discs: auto_volumes[].device_name}') -%} + {%- for ec2_host in r__ec2_instance_info.instances | json_query('[?tags.lifecycle_state != "current"].{hostname: tags.Name, discs: block_device_mappings[].device_name | [1:]}') -%} + {%- if cht_host.hostname | regex_replace('-(?!.*-).*') == ec2_host.hostname | regex_replace('-(?!.*-).*') -%} + {%- if not ec2_host.discs is subset(cht_host.discs) -%} + {%- set testloop.is_not_subset = true -%} + {%- endif -%} + {%- endif -%} + {%- endfor -%} + {%- endfor -%} + {{ testloop.is_not_subset }} + when: cluster_vars.type == "aws" + + - assert: { that: "non_current_hosts | length == 0", fail_msg: "ERROR - All VMs must be in the 'current' lifecycle_state. Those not [{{non_current_hosts | join(',')}}]" } + vars: { non_current_hosts: "{{ cluster_hosts_state | json_query(\"[?tagslabels.lifecycle_state!='current'].name\") }}" } + when: (canary=="start" or canary=="none") + + - assert: { that: "(cluster_hosts_state | selectattr('tagslabels.lifecycle_state', '==', 'current') | list | length) == (cluster_hosts_target | length)", fail_msg: "Cannot use this scheme to redeploy to a different-sized cluster" } diff --git a/redeploy/tasks/main.yml b/redeploy/tasks/main.yml index bb4f62ee..e4788e94 100644 --- a/redeploy/tasks/main.yml +++ b/redeploy/tasks/main.yml @@ -4,7 +4,7 @@ block: - assert: { that: "clean is not defined", msg: "Must not set the 'clean' variable for a redeploy" } - assert: { that: "canary is defined and (canary is defined and canary in ['start', 'finish', 'none', 'tidy', 'revert'])", msg: "Canary must be 'start', 'finish', 'none', 'tidy' or 'revert'" } - - assert: { that: "redeploy_scheme is defined" } + - assert: { that: "redeploy_scheme is defined and redeploy_scheme in redeploy_schemes_supported" } - assert: { that: "cluster_hosts_state | length", msg: "Redeploy only possible with an existing cluster." } - name: "Run the {{redeploy_scheme}} redploy scheme" From 442f99f96c51228be2852c1a44c4357241e24924 Mon Sep 17 00:00:00 2001 From: Dougal Seeley Date: Fri, 2 Oct 2020 10:35:28 +0100 Subject: [PATCH 11/58] Prototype ansible_vault.py plugin --- _dependencies/action_plugins/ansible_vault.py | 49 +++++++++++++++++++ config/tasks/metricbeat.yml | 0 2 files changed, 49 insertions(+) create mode 100644 _dependencies/action_plugins/ansible_vault.py mode change 100755 => 100644 config/tasks/metricbeat.yml diff --git a/_dependencies/action_plugins/ansible_vault.py b/_dependencies/action_plugins/ansible_vault.py new file mode 100644 index 00000000..54d61077 --- /dev/null +++ b/_dependencies/action_plugins/ansible_vault.py @@ -0,0 +1,49 @@ +from __future__ import (absolute_import, division, print_function) + +__metaclass__ = type + +from ansible.plugins.action import ActionBase +from ansible.parsing.vault import VaultLib, VaultSecret +import re + + +class ActionModule(ActionBase): + TRANSFERS_FILES = False + + def run(self, tmp=None, task_vars=None): + if task_vars is None: + task_vars = dict() + + if 'vaultid' not in self._task.args or 'vaultpass' not in self._task.args or 'action' not in self._task.args: + return {"failed": True, "msg": "'vaultid' and 'vaultpass' and 'action' are required options"} + + result = super(ActionModule, self).run(tmp, task_vars) + del tmp # tmp is deprecated + + if self._task.args["action"] == "encrypt": + if "plaintext" not in self._task.args: + return {"failed": True, "msg": "'plaintext' is required for encrypt"} + + # encrypt: + oVaultSecret = VaultSecret(self._task.args["vaultpass"].encode('utf-8')) + oVaultLib = VaultLib([(self._task.args["vaultid"], oVaultSecret)]) + vault_tag = oVaultLib.encrypt(self._task.args["plaintext"], oVaultSecret, self._task.args["vaultid"]) + + # reformat output + g_tag_value = re.match(r"^(?P
\$ANSIBLE_VAULT;(?P[\d\.]+?);(?P\w+?)(?:;(?P.*?))?)[\r\n](?P.*)$", vault_tag, flags=re.DOTALL) + res_cipherstr = re.sub(r'[ \n\r]', "", g_tag_value.group('vaulttext_raw'), flags=re.DOTALL) + res_vaulttext = g_tag_value.group('header') + "\n" + res_cipherstr + + result['msg'] = {"res_vaulttext": res_vaulttext, "plaintext": self._task.args["plaintext"]} + + else: + if "vaulttext" not in self._task.args: + return {"failed": True, "msg": "'vaulttext' is required for decrypt"} + + oVaultLib = VaultLib([(self._task.args["vaultid"], VaultSecret(self._task.args["vaultpass"].encode('utf-8')))]) + plaintext = oVaultLib.decrypt(self._task.args["vaulttext"]) + result['msg'] = {"res_vaulttext": self._task.args["vaulttext"], "plaintext": plaintext} + + result['failed'] = False + + return result diff --git a/config/tasks/metricbeat.yml b/config/tasks/metricbeat.yml old mode 100755 new mode 100644 From c9ed3e440036c34c9abef1b03006f6b7858b1abc Mon Sep 17 00:00:00 2001 From: Dougal Seeley Date: Mon, 5 Oct 2020 09:43:33 +0100 Subject: [PATCH 12/58] Fixes for _scheme_rmvm_keepdisk_rollback --- EXAMPLE/group_vars/_skel/cluster_vars.yml | 15 +- .../group_vars/test_aws_euw1/cluster_vars.yml | 14 +- .../group_vars/test_gcp_euw1/cluster_vars.yml | 2 +- README.md | 22 +- _dependencies/library/ebsmap.py | 270 ++++++++++++++++++ _dependencies/library/ebsmap__LICENSE | 29 ++ _dependencies/library/ebsmap__README.md | 24 ++ _dependencies/tasks/main.yml | 2 +- config/tasks/disks_auto_aws.yml | 109 +++++++ config/tasks/disks_auto_aws_nvme.yml | 112 -------- ...{disks_auto.yml => disks_auto_generic.yml} | 19 +- config/tasks/main.yml | 10 +- create/tasks/aws.yml | 12 +- .../tasks/main.yml | 2 +- .../tasks/main.yml | 2 +- ..._diskinfo_to_cluster_hosts_target__aws.yml | 6 +- .../tasks/preflight.yml | 8 +- 17 files changed, 498 insertions(+), 160 deletions(-) create mode 100644 _dependencies/library/ebsmap.py create mode 100644 _dependencies/library/ebsmap__LICENSE create mode 100644 _dependencies/library/ebsmap__README.md create mode 100644 config/tasks/disks_auto_aws.yml delete mode 100644 config/tasks/disks_auto_aws_nvme.yml rename config/tasks/{disks_auto.yml => disks_auto_generic.yml} (79%) diff --git a/EXAMPLE/group_vars/_skel/cluster_vars.yml b/EXAMPLE/group_vars/_skel/cluster_vars.yml index aab028aa..4f004bd4 100644 --- a/EXAMPLE/group_vars/_skel/cluster_vars.yml +++ b/EXAMPLE/group_vars/_skel/cluster_vars.yml @@ -72,13 +72,14 @@ cluster_name: "{{app_name}}-{{buildenv}}" # Identifies the cluster within # sandbox: # hosttype_vars: # sys: {vms_by_az: {a: 1, b: 1, c: 1}, flavor: t3a.nano, version: "{{sys_version | default('')}}", auto_volumes: []} -# # sysnobeats: {vms_by_az: {a: 1, b: 0, c: 0}, skip_beat_install:true, flavor: t3a.nano, version: "{{sysnobeats_version | default('')}}", auto_volumes: [] -# # sysdisks: {vms_by_az: {a: 1, b: 0, c: 0}, flavor: t3a.nano, version: "{{sysdisks_version | default('')}}", auto_volumes: [{"device_name": "/dev/sdf", mountpoint: "/var/log/mysvc", fstype: "ext4", "volume_type": "gp2", "volume_size": 2, ephemeral: False, encrypted: True, "delete_on_termination": true, perms: {owner: "root", group: "sudo", mode: "775"} }, {"device_name": "/dev/sdg", mountpoint: "/var/log/mysvc2", fstype: "ext4", "volume_type": "gp2", "volume_size": 3, ephemeral: False, encrypted: True, "delete_on_termination": true}, {"device_name": "/dev/sdh", mountpoint: "/var/log/mysvc3", fstype: "ext4", "volume_type": "gp2", "volume_size": 2, ephemeral: False, encrypted: True, "delete_on_termination": true}]} -# # sysdisks_snapshot: {vms_by_az: {a: 1, b: 1, c: 0}, flavor: t3a.nano, auto_volumes: [{"snapshot_tags": {"tag:backup_id": "57180566894481854905"}, "device_name": "/dev/sdf", mountpoint: "/data", fstype: "ext4", "volume_type": "gp2", "volume_size": 2, ephemeral: False, encrypted: True, "delete_on_termination": true }]} -# # hostnvme_multi: {vms_by_az: {a: 1, b: 0, c: 0}, flavor: i3en.2xlarge, auto_volumes: [], nvme: {volumes: [{mountpoint: "/var/log/mysvc", fstype: ext4, volume_size: 2500}, {mountpoint: "/var/log/mysvc2", fstype: ext4, volume_size: 2500}]} } -# # hostnvme_lvm: {vms_by_az: {a: 1, b: 0, c: 0}, flavor: i3en.2xlarge, auto_volumes: [], nvme: {volumes: [{mountpoint: "/var/log/mysvc", fstype: ext4, volume_size: 2500}, {mountpoint: "/var/log/mysvc", fstype: ext4, volume_size: 2500}], lvmparams: {vg_name: "vg0", lv_name: "lv0", lv_size: "+100%FREE"} } } -# # hostssd: {vms_by_az: {a: 1, b: 0, c: 0}, flavor: c3.large, auto_volumes: [{device_name: "/dev/sdf", mountpoint: "/var/log/mysvc", fstype: "ext4", "volume_type": "gp2", "volume_size": 2, ephemeral: False, encrypted: True, "delete_on_termination": true}]} -# # hosthdd: {vms_by_az: {a: 1, b: 0, c: 0}, flavor: h1.2xlarge, auto_volumes: [{device_name: "/dev/sdf", mountpoint: "/var/log/mysvc", fstype: "ext4", "volume_type": "gp2", "volume_size": 2, ephemeral: False, encrypted: True, "delete_on_termination": true}]} +# #sysdisks2: {vms_by_az: {a: 1, b: 0, c: 0}, flavor: t3a.nano, version: "{{sysdisks_version | default('')}}", auto_volumes: [{"device_name": "/dev/sdf", mountpoint: "/media/mysvc", fstype: "ext4", "volume_type": "gp2", "volume_size": 1, encrypted: True, "delete_on_termination": true, perms: {owner: "root", group: "sudo", mode: "775"} }, {"device_name": "/dev/sdg", mountpoint: "/media/mysvc2", fstype: "ext4", "volume_type": "gp2", "volume_size": 1, encrypted: True, "delete_on_termination": true}]} +# #sysdisks3: {vms_by_az: {a: 1, b: 0, c: 0}, flavor: t3a.nano, version: "{{sysdisks_version | default('')}}", auto_volumes: [{"device_name": "/dev/sdf", mountpoint: "/media/mysvc", fstype: "ext4", "volume_type": "gp2", "volume_size": 1, encrypted: True, "delete_on_termination": true, perms: {owner: "root", group: "sudo", mode: "775"} }, {"device_name": "/dev/sdg", mountpoint: "/media/mysvc2", fstype: "ext4", "volume_type": "gp2", "volume_size": 1, encrypted: True, "delete_on_termination": true}, {"device_name": "/dev/sdh", mountpoint: "/media/mysvc3", fstype: "ext4", "volume_type": "gp2", "volume_size": 1, encrypted: True, "delete_on_termination": true}]} +# #sysdisks_snapshot: {vms_by_az: {a: 1, b: 1, c: 0}, flavor: t3a.nano, version: "{{sys_version | default('')}}", auto_volumes: [{"snapshot_tags": {"tag:backup_id": "57180566894481854905"}, "device_name": "/dev/sdf", mountpoint: "/data", fstype: "ext4", "volume_type": "gp2", "volume_size": 1, encrypted: True, "delete_on_termination": true }]} +# #hostnvme_multi: {vms_by_az: {a: 1, b: 0, c: 0}, flavor: i3en.2xlarge, version: "{{sys_version | default('')}}", auto_volumes: [{device_name: "/dev/sdb", mountpoint: "/media/data", fstype: "ext4", "volume_type": "ephemeral", ephemeral: ephemeral0}, {device_name: "/dev/sdbc", mountpoint: "/media/data", fstype: "ext4", "volume_type": "ephemeral", ephemeral: ephemeral0}] } } +# #hostnvme_lvm: {vms_by_az: {a: 1, b: 0, c: 0}, flavor: i3en.2xlarge, version: "{{sys_version | default('')}}", auto_volumes: [{device_name: "/dev/sdb", mountpoint: "/media/data", fstype: "ext4", "volume_type": "ephemeral", ephemeral: ephemeral0}, {device_name: "/dev/sdbc", mountpoint: "/media/data", fstype: "ext4", "volume_type": "ephemeral", ephemeral: ephemeral0}], lvmparams: {vg_name: "vg0", lv_name: "lv0", lv_size: "+100%FREE"} } +# #hosthdd_multi: {vms_by_az: {a: 1, b: 0, c: 0}, flavor: d2.xlarge, version: "{{sys_version | default('')}}", auto_volumes: [{device_name: "/dev/sdb", mountpoint: "/media/mysvc", fstype: "ext4", "volume_type": "ephemeral", ephemeral: ephemeral0}, {device_name: "/dev/sdc", mountpoint: "/media/mysvc2", fstype: "ext4", "volume_type": "ephemeral", ephemeral: ephemeral1}, {device_name: "/dev/sdd", mountpoint: "/media/mysvc3", fstype: "ext4", "volume_type": "ephemeral", ephemeral: ephemeral2}] } +# #hosthdd_lvm: {vms_by_az: {a: 1, b: 0, c: 0}, flavor: d2.xlarge, version: "{{sys_version | default('')}}", auto_volumes: [{device_name: "/dev/sdb", mountpoint: "/media/data", fstype: "ext4", "volume_type": "ephemeral", ephemeral: ephemeral0}, {device_name: "/dev/sdc", mountpoint: "/media/data", fstype: "ext4", "volume_type": "ephemeral", ephemeral: ephemeral1}, {device_name: "/dev/sdd", mountpoint: "/media/data", fstype: "ext4", "volume_type": "ephemeral", ephemeral: ephemeral2}], lvmparams: {vg_name: "vg0", lv_name: "lv0", lv_size: "+100%FREE"} } +# #hostssd: {vms_by_az: {a: 1, b: 0, c: 0}, flavor: c3.large, version: "{{sys_version | default('')}}", auto_volumes: [{device_name: "/dev/sdf", mountpoint: "/media/mysvc", fstype: "ext4", "volume_type": "gp2", "volume_size": 1, ephemeral: 0, encrypted: True, "delete_on_termination": true}]} # aws_access_key: "" # aws_secret_key: "" # vpc_name: "test{{buildenv}}" diff --git a/EXAMPLE/group_vars/test_aws_euw1/cluster_vars.yml b/EXAMPLE/group_vars/test_aws_euw1/cluster_vars.yml index d3141a27..810f4be2 100644 --- a/EXAMPLE/group_vars/test_aws_euw1/cluster_vars.yml +++ b/EXAMPLE/group_vars/test_aws_euw1/cluster_vars.yml @@ -79,12 +79,14 @@ cluster_vars: sandbox: hosttype_vars: sys: {vms_by_az: {a: 1, b: 1, c: 1}, flavor: t3a.nano, version: "{{sys_version | default('')}}", auto_volumes: []} -# sysdisks: {vms_by_az: {a: 1, b: 0, c: 0}, flavor: t3a.nano, version: "{{sysdisks_version | default('')}}", auto_volumes: [{"device_name": "/dev/sdf", mountpoint: "/var/log/mysvc", fstype: "ext4", "volume_type": "gp2", "volume_size": 2, ephemeral: False, encrypted: True, "delete_on_termination": true, perms: {owner: "root", group: "sudo", mode: "775"} }, {"device_name": "/dev/sdg", mountpoint: "/var/log/mysvc2", fstype: "ext4", "volume_type": "gp2", "volume_size": 3, ephemeral: False, encrypted: True, "delete_on_termination": true}, {"device_name": "/dev/sdh", mountpoint: "/var/log/mysvc3", fstype: "ext4", "volume_type": "gp2", "volume_size": 2, ephemeral: False, encrypted: True, "delete_on_termination": true}]} -# sysdisks_snapshot: {vms_by_az: {a: 1, b: 1, c: 0}, flavor: t3a.nano, auto_volumes: [{"snapshot_tags": {"tag:backup_id": "57180566894481854905"}, "device_name": "/dev/sdf", mountpoint: "/data", fstype: "ext4", "volume_type": "gp2", "volume_size": 2, ephemeral: False, encrypted: True, "delete_on_termination": true }]} -# hostnvme_multi: {vms_by_az: {a: 1, b: 0, c: 0}, flavor: i3en.2xlarge, auto_volumes: [], nvme: {volumes: [{mountpoint: "/var/log/mysvc", fstype: ext4, volume_size: 2500}, {mountpoint: "/var/log/mysvc2", fstype: ext4, volume_size: 2500}]} } -# hostnvme_lvm: {vms_by_az: {a: 1, b: 0, c: 0}, flavor: i3en.2xlarge, auto_volumes: [], nvme: {volumes: [{mountpoint: "/var/log/mysvc", fstype: ext4, volume_size: 2500}, {mountpoint: "/var/log/mysvc", fstype: ext4, volume_size: 2500}], lvmparams: {vg_name: "vg0", lv_name: "lv0", lv_size: "+100%FREE"} } } -# hostssd: {vms_by_az: {a: 1, b: 0, c: 0}, flavor: c3.large, auto_volumes: [{device_name: "/dev/sdf", mountpoint: "/var/log/mysvc", fstype: "ext4", "volume_type": "gp2", "volume_size": 2, ephemeral: False, encrypted: True, "delete_on_termination": true}]} -# hosthdd: {vms_by_az: {a: 1, b: 0, c: 0}, flavor: h1.2xlarge, auto_volumes: [{device_name: "/dev/sdf", mountpoint: "/var/log/mysvc", fstype: "ext4", "volume_type": "gp2", "volume_size": 2, ephemeral: False, encrypted: True, "delete_on_termination": true}]} +# sysdisks2: {vms_by_az: {a: 1, b: 0, c: 0}, flavor: t3a.nano, version: "{{sysdisks_version | default('')}}", auto_volumes: [{"device_name": "/dev/sdf", mountpoint: "/media/mysvc", fstype: "ext4", "volume_type": "gp2", "volume_size": 1, encrypted: True, "delete_on_termination": true, perms: {owner: "root", group: "sudo", mode: "775"} }, {"device_name": "/dev/sdg", mountpoint: "/media/mysvc2", fstype: "ext4", "volume_type": "gp2", "volume_size": 1, encrypted: True, "delete_on_termination": true}]} +# sysdisks3: {vms_by_az: {a: 1, b: 0, c: 0}, flavor: t3a.nano, version: "{{sysdisks_version | default('')}}", auto_volumes: [{"device_name": "/dev/sdf", mountpoint: "/media/mysvc", fstype: "ext4", "volume_type": "gp2", "volume_size": 1, encrypted: True, "delete_on_termination": true, perms: {owner: "root", group: "sudo", mode: "775"} }, {"device_name": "/dev/sdg", mountpoint: "/media/mysvc2", fstype: "ext4", "volume_type": "gp2", "volume_size": 1, encrypted: True, "delete_on_termination": true}, {"device_name": "/dev/sdh", mountpoint: "/media/mysvc3", fstype: "ext4", "volume_type": "gp2", "volume_size": 1, encrypted: True, "delete_on_termination": true}]} +# sysdisks_snapshot: {vms_by_az: {a: 1, b: 1, c: 0}, flavor: t3a.nano, version: "{{sys_version | default('')}}", auto_volumes: [{"snapshot_tags": {"tag:backup_id": "57180566894481854905"}, "device_name": "/dev/sdf", mountpoint: "/data", fstype: "ext4", "volume_type": "gp2", "volume_size": 1, encrypted: True, "delete_on_termination": true }]} +# hostnvme_multi: {vms_by_az: {a: 1, b: 0, c: 0}, flavor: i3en.2xlarge, version: "{{sys_version | default('')}}", auto_volumes: [{device_name: "/dev/sdb", mountpoint: "/media/data", fstype: "ext4", "volume_type": "ephemeral", ephemeral: ephemeral0}, {device_name: "/dev/sdbc", mountpoint: "/media/data", fstype: "ext4", "volume_type": "ephemeral", ephemeral: ephemeral0}] } } +# hostnvme_lvm: {vms_by_az: {a: 1, b: 0, c: 0}, flavor: i3en.2xlarge, version: "{{sys_version | default('')}}", auto_volumes: [{device_name: "/dev/sdb", mountpoint: "/media/data", fstype: "ext4", "volume_type": "ephemeral", ephemeral: ephemeral0}, {device_name: "/dev/sdbc", mountpoint: "/media/data", fstype: "ext4", "volume_type": "ephemeral", ephemeral: ephemeral0}], lvmparams: {vg_name: "vg0", lv_name: "lv0", lv_size: "+100%FREE"} } +# hosthdd_multi: {vms_by_az: {a: 1, b: 0, c: 0}, flavor: d2.xlarge, version: "{{sys_version | default('')}}", auto_volumes: [{device_name: "/dev/sdb", mountpoint: "/media/mysvc", fstype: "ext4", "volume_type": "ephemeral", ephemeral: ephemeral0}, {device_name: "/dev/sdc", mountpoint: "/media/mysvc2", fstype: "ext4", "volume_type": "ephemeral", ephemeral: ephemeral1}, {device_name: "/dev/sdd", mountpoint: "/media/mysvc3", fstype: "ext4", "volume_type": "ephemeral", ephemeral: ephemeral2}] } +# hosthdd_lvm: {vms_by_az: {a: 1, b: 0, c: 0}, flavor: d2.xlarge, version: "{{sys_version | default('')}}", auto_volumes: [{device_name: "/dev/sdb", mountpoint: "/media/data", fstype: "ext4", "volume_type": "ephemeral", ephemeral: ephemeral0}, {device_name: "/dev/sdc", mountpoint: "/media/data", fstype: "ext4", "volume_type": "ephemeral", ephemeral: ephemeral1}, {device_name: "/dev/sdd", mountpoint: "/media/data", fstype: "ext4", "volume_type": "ephemeral", ephemeral: ephemeral2}], lvmparams: {vg_name: "vg0", lv_name: "lv0", lv_size: "+100%FREE"} } +# hostssd: {vms_by_az: {a: 1, b: 0, c: 0}, flavor: c3.large, version: "{{sys_version | default('')}}", auto_volumes: [{device_name: "/dev/sdf", mountpoint: "/media/mysvc", fstype: "ext4", "volume_type": "gp2", "volume_size": 1, ephemeral: 0, encrypted: True, "delete_on_termination": true}]} aws_access_key: "" aws_secret_key: "" vpc_name: "test{{buildenv}}" diff --git a/EXAMPLE/group_vars/test_gcp_euw1/cluster_vars.yml b/EXAMPLE/group_vars/test_gcp_euw1/cluster_vars.yml index c9ec7076..e4390373 100644 --- a/EXAMPLE/group_vars/test_gcp_euw1/cluster_vars.yml +++ b/EXAMPLE/group_vars/test_gcp_euw1/cluster_vars.yml @@ -6,7 +6,7 @@ gcp_credentials_json: "{{ lookup('file', gcp_credentials_file) | default({'proje redeploy_schemes_supported: ['_scheme_addallnew_rmdisk_rollback', '_scheme_addnewvm_rmdisk_rollback', '_scheme_rmvm_rmdisk_only'] -redeploy_scheme: _scheme_addallnew_rmdisk_rollback +#redeploy_scheme: _scheme_addallnew_rmdisk_rollback #redeploy_scheme: _scheme_addnewvm_rmdisk_rollback #redeploy_scheme: _scheme_rmvm_rmdisk_only #redeploy_scheme: _scheme_rmvm_keepdisk_rollback diff --git a/README.md b/README.md index 33b6c03e..d963d3d9 100644 --- a/README.md +++ b/README.md @@ -5,7 +5,7 @@ A full-lifecycle, immutable cloud infrastructure cluster management **role**, us + **Scale (e.g. add a node):** If you change the config yaml and rerun the deploy, new nodes will be added. + **Redeploy (e.g. up-version):** If you need to up-version, the `redeploy.yml` playbook will replace each node in turn, (with optional callbacks), and rollback if any failures occur. -**clusterverse** is designed to deploy base-vm infrastructure that underpins cluster-based infrastructure, for example, Couchbase, or Cassandra. +**clusterverse** is designed to manage base-vm infrastructure that underpins cluster-based infrastructure, for example, Couchbase, Kafka, Elasticsearch, or Cassandra. ## Contributing Contributions are welcome and encouraged. Please see [CONTRIBUTING.md](https://github.com/sky-uk/clusterverse/blob/master/CONTRIBUTING.md) for details. @@ -35,7 +35,8 @@ To active the pipenv: ### DNS DNS is optional. If unset, no DNS names will be created. If required, you will need a DNS zone delegated to one of the following: + Bind9 -+ Route53 ++ AWS Route53 ++ Google Cloud DNS Credentials to the DNS server will also be required. These are specified in the `cluster_vars.yml` file described below. @@ -70,13 +71,15 @@ Credentials can be encrypted inline in the playbooks using [ansible-vault](https ## Usage **clusterverse** is an Ansible _role_, and as such must be imported into your \/roles directory. There is a full-featured example in the [/EXAMPLE](https://github.com/sky-uk/clusterverse/tree/master/EXAMPLE) subdirectory. -To import the role into your project, create a `requirements.yml` file containing: +To import the role into your project, create a [`requirements.yml`](https://github.com/sky-uk/clusterverse/blob/master/EXAMPLE/requirements.yml) file containing: ``` - src: https://github.com/sky-uk/clusterverse - version: master ## or hash, or version + version: master ## branch, hash, or tag name: clusterverse ``` -To install the role into a project's `roles` directory: +If you use a `cluster.yml` file similar to the example found in [EXAMPLE/cluster.yml](https://github.com/sky-uk/clusterverse/blob/master/EXAMPLE/cluster.yml), clusterverse will be installed automatically on each run of the playbook. + +To install it manually: + `ansible-galaxy install -r requirements.yml -p //roles/` @@ -110,7 +113,7 @@ The role is designed to run in two modes: + **It assumes a resilient deployment (it can tolerate one node being deleted from the cluster). There is no rollback in case of failure** + For each node in the cluster: + Run `predeleterole` - + Delete the node + + Delete/ terminate the node (note, this is _irreversible_). + Run the main cluster.yml (with the same parameters as for the main playbook), which forces the missing node to be redeployed (the `cluster_suffix` remains the same). + If the process fails at any point: + No further VMs will be deleted or rebuilt - the playbook stops. @@ -130,8 +133,9 @@ The role is designed to run in two modes: + The old VMs are stopped. + If the process fails for any reason, the old VMs are reinstated, and the new VMs stopped (rollback) + To delete the old VMs, either set '-e canary_tidy_on_success=true', or call redeploy.yml with '-e canary=tidy' - + **_scheme_rmvm_keepdisk_rollback** - + _Cluster topology must remain identical_ + + **_scheme_rmvm_keepdisk_rollback (AWS only so far)** + + Redeploys the nodes one by one, and moves the secondary (non-root) disks from the old to the new (note, only non-ephemeral disks can be moved). + + _Cluster topology must remain identical. More disks may be added, but none may change or be removed._ + **It assumes a resilient deployment (it can tolerate one node being removed from the cluster).** + For each node in the cluster: + Run `predeleterole` @@ -139,5 +143,5 @@ The role is designed to run in two modes: + Detach the disks from the old node + Run the main cluster.yml to create a new node + Attach disks to new node - + If the process fails for any reason, the old VMs are reinstated (and the disks reattached to the old nodes), and the new VMs stopped (rollback) + + If the process fails for any reason, the old VMs are reinstated (and the disks reattached to the old nodes), and the new VMs are stopped (rollback) + To delete the old VMs, either set '-e canary_tidy_on_success=true', or call redeploy.yml with '-e canary=tidy' diff --git a/_dependencies/library/ebsmap.py b/_dependencies/library/ebsmap.py new file mode 100644 index 00000000..dbd6d717 --- /dev/null +++ b/_dependencies/library/ebsmap.py @@ -0,0 +1,270 @@ +# Copyright 2020 Dougal Seeley +# https://github.com/dseeley/ebsmap + +# Copyright 2017 Amazon.com, Inc. and its affiliates. All Rights Reserved. +# Licensed under the MIT License. See the LICENSE accompanying this file +# for the specific language governing permissions and limitations under +# the License. + +from __future__ import (absolute_import, division, print_function) + +__metaclass__ = type + +DOCUMENTATION = ''' +--- +module: ebsmap +version_added: 1.0.0 +short_description: ebsmap +description: + - Map the EBS device name as defined in AWS (e.g. /dev/sdf) with the volume provided to the OS +author: + - Dougal Seeley + - Amazon.com inc. +''' + +EXAMPLES = ''' +- name: Get the nvme map information + ebsmap: + become: yes + register: r__ebsmap + +- name: ebsmap + debug: msg={{ebsmap}} +''' + +RETURN = ''' +"device_map": [ + { + "FSTYPE": "ext4", + "MOUNTPOINT": "/media/mysvc", + "NAME": "nvme1n1", + "PARTLABEL": "", + "SERIAL": "vol0c2c47ee4516063e9", + "TYPE": "disk", + "UUID": "c3630dbe-042e-44e5-ac67-54fa1c9e4cd2", + "device_name_aws": "/dev/sdf", + "device_name_os": "/dev/nvme1n1", + "volume_id": "vol-0c2c47ee4516063e9" + }, + { + "FSTYPE": "", + "MOUNTPOINT": "", + "NAME": "nvme0n1", + "PARTLABEL": "", + "SERIAL": "vol0b05e48d5677db81a", + "TYPE": "disk", + "UUID": "", + "device_name_aws": "/dev/sda1", + "device_name_os": "/dev/nvme0n1", + "volume_id": "vol-0b05e48d5677db81a" + }, + { + "FSTYPE": "ext4", + "MOUNTPOINT": "/", + "NAME": "nvme0n1p1", + "PARTLABEL": "", + "SERIAL": "", + "TYPE": "part", + "UUID": "96ec7adb-9d94-41c0-96a5-d6992c9d5f20", + "device_name_aws": "/dev/sda1", + "device_name_os": "/dev/nvme0n1p1", + "volume_id": "vol-0b05e48d5677db81a" + } + +"device_map": [ + { + "FSTYPE": "", + "MOUNTPOINT": "", + "NAME": "xvda", + "PARTLABEL": "", + "SERIAL": "", + "TYPE": "disk", + "UUID": "", + "device_name_aws": "/dev/sda", + "device_name_os": "/dev/xvda" + }, + { + "FSTYPE": "ext4", + "MOUNTPOINT": "/", + "NAME": "xvda1", + "PARTLABEL": "", + "SERIAL": "", + "TYPE": "part", + "UUID": "96ec7adb-9d94-41c0-96a5-d6992c9d5f20", + "device_name_aws": "/dev/sda1", + "device_name_os": "/dev/xvda1" + } +''' + +from ctypes import * +from fcntl import ioctl +import subprocess +import sys +import json +import re + +try: + from ansible.module_utils.basic import AnsibleModule + from ansible.errors import AnsibleError + from ansible.utils.display import Display +except: + pass + +NVME_ADMIN_IDENTIFY = 0x06 +NVME_IOCTL_ADMIN_CMD = 0xC0484E41 +AMZN_NVME_VID = 0x1D0F +AMZN_NVME_EBS_MN = "Amazon Elastic Block Store" + + +class nvme_admin_command(Structure): + _pack_ = 1 + _fields_ = [("opcode", c_uint8), # op code + ("flags", c_uint8), # fused operation + ("cid", c_uint16), # command id + ("nsid", c_uint32), # namespace id + ("reserved0", c_uint64), + ("mptr", c_uint64), # metadata pointer + ("addr", c_uint64), # data pointer + ("mlen", c_uint32), # metadata length + ("alen", c_uint32), # data length + ("cdw10", c_uint32), + ("cdw11", c_uint32), + ("cdw12", c_uint32), + ("cdw13", c_uint32), + ("cdw14", c_uint32), + ("cdw15", c_uint32), + ("reserved1", c_uint64)] + + +class nvme_identify_controller_amzn_vs(Structure): + _pack_ = 1 + _fields_ = [("bdev", c_char * 32), # block device name + ("reserved0", c_char * (1024 - 32))] + + +class nvme_identify_controller_psd(Structure): + _pack_ = 1 + _fields_ = [("mp", c_uint16), # maximum power + ("reserved0", c_uint16), + ("enlat", c_uint32), # entry latency + ("exlat", c_uint32), # exit latency + ("rrt", c_uint8), # relative read throughput + ("rrl", c_uint8), # relative read latency + ("rwt", c_uint8), # relative write throughput + ("rwl", c_uint8), # relative write latency + ("reserved1", c_char * 16)] + + +class nvme_identify_controller(Structure): + _pack_ = 1 + _fields_ = [("vid", c_uint16), # PCI Vendor ID + ("ssvid", c_uint16), # PCI Subsystem Vendor ID + ("sn", c_char * 20), # Serial Number + ("mn", c_char * 40), # Module Number + ("fr", c_char * 8), # Firmware Revision + ("rab", c_uint8), # Recommend Arbitration Burst + ("ieee", c_uint8 * 3), # IEEE OUI Identifier + ("mic", c_uint8), # Multi-Interface Capabilities + ("mdts", c_uint8), # Maximum Data Transfer Size + ("reserved0", c_uint8 * (256 - 78)), + ("oacs", c_uint16), # Optional Admin Command Support + ("acl", c_uint8), # Abort Command Limit + ("aerl", c_uint8), # Asynchronous Event Request Limit + ("frmw", c_uint8), # Firmware Updates + ("lpa", c_uint8), # Log Page Attributes + ("elpe", c_uint8), # Error Log Page Entries + ("npss", c_uint8), # Number of Power States Support + ("avscc", c_uint8), # Admin Vendor Specific Command Configuration + ("reserved1", c_uint8 * (512 - 265)), + ("sqes", c_uint8), # Submission Queue Entry Size + ("cqes", c_uint8), # Completion Queue Entry Size + ("reserved2", c_uint16), + ("nn", c_uint32), # Number of Namespaces + ("oncs", c_uint16), # Optional NVM Command Support + ("fuses", c_uint16), # Fused Operation Support + ("fna", c_uint8), # Format NVM Attributes + ("vwc", c_uint8), # Volatile Write Cache + ("awun", c_uint16), # Atomic Write Unit Normal + ("awupf", c_uint16), # Atomic Write Unit Power Fail + ("nvscc", c_uint8), # NVM Vendor Specific Command Configuration + ("reserved3", c_uint8 * (704 - 531)), + ("reserved4", c_uint8 * (2048 - 704)), + ("psd", nvme_identify_controller_psd * 32), # Power State Descriptor + ("vs", nvme_identify_controller_amzn_vs)] # Vendor Specific + + +class ebs_nvme_device: + def __init__(self, device): + self.device = device + self.ctrl_identify() + + def _nvme_ioctl(self, id_response, id_len): + admin_cmd = nvme_admin_command(opcode=NVME_ADMIN_IDENTIFY, addr=id_response, alen=id_len, cdw10=1) + with open(self.device, "rt") as nvme: + ioctl(nvme, NVME_IOCTL_ADMIN_CMD, admin_cmd) + + def ctrl_identify(self): + self.id_ctrl = nvme_identify_controller() + self._nvme_ioctl(addressof(self.id_ctrl), sizeof(self.id_ctrl)) + if self.id_ctrl.vid != AMZN_NVME_VID or self.id_ctrl.mn.decode().strip() != AMZN_NVME_EBS_MN: + raise TypeError("[ERROR] Not an EBS device: '{0}'".format(self.device)) + + def get_volume_id(self): + vol = self.id_ctrl.sn.decode() + if vol.startswith("vol") and vol[3] != "-": + vol = "vol-" + vol[3:] + return vol + + def get_block_device(self, stripped=False): + device = self.id_ctrl.vs.bdev.decode() + if stripped and device.startswith("/dev/"): + device = device[5:] + return device + + +def main(): + if not (len(sys.argv) > 1 and sys.argv[1] == "console"): + module = AnsibleModule(argument_spec={}, supports_check_mode=True) + else: + # For testing without Ansible (e.g on Windows) + class cDummyAnsibleModule(): + params = {} + + def exit_json(self, changed, **kwargs): + print(changed, json.dumps(kwargs, sort_keys=True, indent=4, separators=(',', ': '))) + + def warn(self, msg): + print("[WARNING]: " + msg) + + def fail_json(self, msg): + print("Failed: " + msg) + exit(1) + + module = cDummyAnsibleModule() + + # Get all existing block volumes by key=value, then parse this into a dictionary (which excludes non disk and partition block types, e.g. ram, loop). + lsblk_devices = subprocess.check_output(['lsblk', '-o', 'NAME,TYPE,UUID,FSTYPE,PARTLABEL,MOUNTPOINT,SERIAL', '-P']).decode().rstrip().split('\n') + os_device_names = [dict((map(lambda x: x.strip("\""), sub.split("="))) for sub in dev.split('\" ') if '=' in sub) for dev in lsblk_devices] + os_device_names = [dev for dev in os_device_names if dev['TYPE'] in ['disk', 'part', 'lvm']] + + for os_device in os_device_names: + os_device_path = "/dev/" + os_device['NAME'] + if os_device['NAME'].startswith("nvme"): + try: + dev = ebs_nvme_device(os_device_path) + except FileNotFoundError as e: + module.fail_json(msg=os_device_path + ": FileNotFoundError" + str(e)) + except OSError as e: + module.warn(u"%s is not an nvme device." % os_device_path) + else: + os_device.update({"device_name_os": os_device_path, "device_name_aws": '/dev/' + dev.get_block_device(stripped=True).rstrip(), "volume_id": dev.get_volume_id()}) + elif os_device['NAME'].startswith("xvd"): + os_device.update({"device_name_os": os_device_path, "device_name_aws": '/dev/' + re.sub(r'xvd(.*)', r'sd\1', os_device['NAME'])}) + else: + os_device.update({"device_name_os": os_device_path, "device_name_aws": ""}) + + module.exit_json(changed=False, device_map=os_device_names) + + +if __name__ == '__main__': + main() diff --git a/_dependencies/library/ebsmap__LICENSE b/_dependencies/library/ebsmap__LICENSE new file mode 100644 index 00000000..3c642ec5 --- /dev/null +++ b/_dependencies/library/ebsmap__LICENSE @@ -0,0 +1,29 @@ +BSD 3-Clause License + +Copyright (c) 2020, Dougal Seeley +All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are met: + +* Redistributions of source code must retain the above copyright notice, this + list of conditions and the following disclaimer. + +* Redistributions in binary form must reproduce the above copyright notice, + this list of conditions and the following disclaimer in the documentation + and/or other materials provided with the distribution. + +* Neither the name of the copyright holder nor the names of its + contributors may be used to endorse or promote products derived from + this software without specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE +DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE +FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR +SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER +CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, +OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. diff --git a/_dependencies/library/ebsmap__README.md b/_dependencies/library/ebsmap__README.md new file mode 100644 index 00000000..f38b360c --- /dev/null +++ b/_dependencies/library/ebsmap__README.md @@ -0,0 +1,24 @@ +# ebsmap + +This is an Ansible module that is able to map AWS EBS device names (including NVME devices) to the host device names. + +## Credits +The bulk of the heavy lifting is nvme ioctl commands written by AWS for their Amazon Linux AMIs. See: https://docs.aws.amazon.com/AWSEC2/latest/UserGuide/nvme-ebs-volumes.html + +## Execution +This can be run as an Ansible module (needs root): +```yaml +- name: Get the nvme map information + ebsmap: + become: yes + register: r__ebsmap + +- name: ebsmap + debug: msg={{ebsmap}} + +``` + +or from the console: +```bash +python3 ./ebsmap.py console +``` \ No newline at end of file diff --git a/_dependencies/tasks/main.yml b/_dependencies/tasks/main.yml index ad6b4d29..2d300f62 100644 --- a/_dependencies/tasks/main.yml +++ b/_dependencies/tasks/main.yml @@ -36,5 +36,5 @@ - assert: { that: "(cluster_vars.assign_public_ip == 'yes' and cluster_vars.inventory_ip == 'public') or (cluster_vars.inventory_ip == 'private')", fail_msg: "If inventory_ip=='public', 'assign_public_ip' must be 'yes'" } when: cluster_vars.type == "gcp" or cluster_vars.type == "aws" - - assert: { that: "cluster_vars[buildenv] | json_query(\"hosttype_vars.*.auto_volumes[] | [?contains(`/dev/sdb,/dev/sdc,/dev/sdd,/dev/sde`, device_name)]\") | length == 0", fail_msg: "/dev/sd[b-e] are not allowed as device_name in AWS cluster_vars[buildenv].hosttype_vars. Please start at /dev/sdf." } + - assert: { that: "cluster_vars[buildenv] | json_query(\"hosttype_vars.*.auto_volumes[] | [?contains(`/dev/sdb,/dev/sdc,/dev/sdd,/dev/sde`, device_name) && volume_type!='ephemeral']\") | length == 0", fail_msg: "device_names /dev/sd[b-e] are only allowed for ephemeral volumes in AWS cluster_vars[buildenv].hosttype_vars. Please start non-ephemeral devices at /dev/sdf." } when: cluster_vars.type == "aws" diff --git a/config/tasks/disks_auto_aws.yml b/config/tasks/disks_auto_aws.yml new file mode 100644 index 00000000..bc2f8859 --- /dev/null +++ b/config/tasks/disks_auto_aws.yml @@ -0,0 +1,109 @@ +--- + +- name: disks_auto_aws | auto_volumes + debug: msg={{ cluster_vars[buildenv].hosttype_vars[hostvars[inventory_hostname].hosttype].auto_volumes }} + +- name: disks_auto_aws | cluster_hosts_target(inventory_hostname) + debug: msg={{ (cluster_hosts_target | selectattr('hostname', '==', inventory_hostname) | list | first)['auto_volumes'] }} + + +- name: disks_auto_aws | Mount volumes as individual disks + block: + - name: disks_auto_aws | Get the nvme information (pre-filesystem create) + ebsmap: + become: yes + register: r__ebsmap + + - name: disks_auto_aws | r__ebsmap (pre-filesystem create) + debug: msg={{r__ebsmap}} + + - name: disks_auto_aws | Create filesystem (partitionless) + become: yes + filesystem: + fstype: "{{ item.fstype }}" + dev: "{{ (r__ebsmap.device_map | selectattr('device_name_aws', '==', item.device_name) | list | last)['device_name_os'] }}" + loop: "{{auto_vols}}" + + - name: disks_auto_aws | Get the nvme information (post-filesystem create), to get the block IDs for mounting + ebsmap: + become: yes + register: r__ebsmap + + - name: disks_auto_aws | r__ebsmap (post-filesystem create) + debug: msg={{r__ebsmap}} + + - name: disks_auto_aws | Mount created filesytem(s) persistently + become: yes + mount: + path: "{{ item.mountpoint }}" + src: "UUID={{ (r__ebsmap.device_map | selectattr('device_name_aws', '==', item.device_name) | list | last)['UUID'] }}" + fstype: "{{ item.fstype }}" + state: mounted + opts: _netdev + loop: "{{auto_vols}}" + + - name: disks_auto_aws | change ownership of mountpoint (if set) + become: yes + file: + path: "{{ item.mountpoint }}" + state: directory + mode: "{{ item.perms.mode | default(omit)}}" + owner: "{{ item.perms.owner | default(omit)}}" + group: "{{ item.perms.group | default(omit)}}" + loop: "{{auto_vols}}" + when: (auto_vols | map(attribute='mountpoint') | list | unique | count == auto_vols | map(attribute='mountpoint') | list | count) + vars: + auto_vols: "{{ cluster_vars[buildenv].hosttype_vars[hostvars[inventory_hostname].hosttype].auto_volumes }}" + + +# The following block mounts all nvme attached volumes that have a single, common mountpoint, by creating a logical volume +- name: disks_auto_aws | Mount nvme volumes in a single mountpoint through LV/VG + block: + - name: disks_auto_aws | Install logical volume management tooling. (yum - RedHat/CentOS) + become: true + yum: + name: "lvm*" + state: present + when: ansible_os_family == 'RedHat' + + - name: disks_auto_aws | Get the nvme information (pre-filesystem create) + ebsmap: + become: yes + register: r__ebsmap + + - name: disks_auto_aws | r__ebsmap (pre-filesystem create) + debug: msg={{r__ebsmap}} + + - name: disks_auto_aws | Create a volume group from all nvme devices + become: yes + lvg: + vg: "{{ hosttype_vars.lvmparams.vg_name }}" + pvs: "{{ r__ebsmap.device_map | json_query(\"[?device_name_aws && contains('\" + auto_vol_device_names + \"', device_name_aws)].device_name_os\") | join(',')}}" + vars: + auto_vol_device_names: "{{hosttype_vars.auto_volumes | map(attribute='device_name') | sort | join(',')}}" + + - name: disks_auto_aws | Create a logical volume from volume group + become: yes + lvol: + vg: "{{ hosttype_vars.lvmparams.vg_name }}" + lv: "{{ hosttype_vars.lvmparams.lv_name }}" + size: "{{ hosttype_vars.lvmparams.lv_size }}" + + - name: disks_auto_aws | Create filesystem(s) on attached nvme volume(s) + become: yes + filesystem: + fstype: "{{ hosttype_vars.auto_volumes | map(attribute='fstype') | list | unique | join('') }}" + dev: "/dev/{{ hosttype_vars.lvmparams.vg_name }}/{{ hosttype_vars.lvmparams.lv_name }}" + force: no + + - name: disks_auto_aws | Mount created filesytem(s) persistently + become: yes + mount: + path: "{{ hosttype_vars.auto_volumes | map(attribute='mountpoint') | list | unique | join('') }}" + src: "/dev/{{ hosttype_vars.lvmparams.vg_name }}/{{ hosttype_vars.lvmparams.lv_name }}" + fstype: "{{ hosttype_vars.auto_volumes | map(attribute='fstype') | list | unique | join('') }}" + state: mounted + opts: _netdev + when: ('lvmparams' in hosttype_vars) and (hosttype_vars.auto_volumes | map(attribute='mountpoint') | list | unique | count == 1) and (hosttype_vars.auto_volumes | map(attribute='mountpoint') | list | count >= 2) and (hosttype_vars.auto_volumes | map(attribute='fstype') | list | unique | count == 1) + vars: + hosttype_vars: "{{ cluster_vars[buildenv].hosttype_vars[hostvars[inventory_hostname].hosttype] }}" diff --git a/config/tasks/disks_auto_aws_nvme.yml b/config/tasks/disks_auto_aws_nvme.yml deleted file mode 100644 index c1623f2a..00000000 --- a/config/tasks/disks_auto_aws_nvme.yml +++ /dev/null @@ -1,112 +0,0 @@ ---- -#- debug: msg={{ cluster_vars[buildenv].hosttype_vars[hostvars[inventory_hostname].hosttype].nvme_volumes }} -#- debug: msg={{ ansible_facts.devices }} -- block: - - name: autodisks_nvme | Get unused block devices - set_fact: - block_devices: "{{ {'dev': item, 'size_b': (ansible_facts.devices[item].sectors|int) * (ansible_facts.devices[item].sectorsize|int)} }}" - with_items: "{{ ansible_facts.devices }}" - register: block_devices_list - when: item | regex_search("nvme") and ansible_facts.devices[item].partitions == {} - - - name: autodisks_nvme | Create unused block devices list - set_fact: - lsblk_volumes: "{{ block_devices_list.results | map(attribute='ansible_facts.block_devices') | select('defined') | list }}" - - - name: autodisks_nvme | lsblk_volumes - debug: msg={{ lsblk_volumes }} - - - name: autodisks_nvme | Create 'nvmevols' fact that contains a list of available host nvme devices (lsblk) mapped to the mountpoints defined in cluster_vars. Handles single mounting points with LV/VG - set_fact: - nvmevols: | - {% set res = [] -%} - {% set tmp_blkvols = lsblk_volumes -%} - {%- for nvmevol in cluster_vars[buildenv].hosttype_vars[hostvars[inventory_hostname].hosttype].nvme.volumes -%} - {%- set blkvolloop = namespace(break=false) -%} - {%- for blkvol in tmp_blkvols if not blkvolloop.break -%} - {%- if (nvmevol.volume_size*1000000000|int) == (blkvol.size_b|int) -%} - {%- set _ = res.extend([ {'device': '/dev/'+blkvol.dev, 'mountpoint': nvmevol.mountpoint, 'fstype': nvmevol.fstype, 'perms': autovol.perms | default({})}]) -%} - {%- set _ = tmp_blkvols.remove(blkvol) -%} - {%- set blkvolloop.break = true -%} - {%- endif -%} - {%- endfor -%} - {%- endfor -%} - {{ res }} - - - name: autodisks_nvme | nvme mountpoints - debug: msg={{ nvmevols | map(attribute='mountpoint') | list | unique }} - - # The following block mounts all nvme attached volumes that have individual mountpoints - - name: autodisks_nvme | Mount nvme volumes with different mountpoints - block: - - name: autodisks_nvme | Create filesystem(s) on attached nvme volume(s) - become: yes - filesystem: - fstype: "{{ item.fstype }}" - dev: "{{ item.device }}" - force: no - with_items: "{{ nvmevols }}" - - - name: autodisks_nvme | Mount nvme created filesytem(s) persistently - become: yes - mount: - path: "{{ item.mountpoint }}" - src: "{{ item.device }}" - fstype: "{{ item.fstype }}" - state: mounted - opts: _netdev - with_items: "{{ nvmevols }}" - - - name: autodisks_nvme | change ownership of mountpoint (if set) - become: yes - file: - path: "{{ item.mountpoint }}" - state: directory - mode: "{{ item.perms.mode | default(omit)}}" - owner: "{{ item.perms.owner | default(omit)}}" - group: "{{ item.perms.group | default(omit)}}" - with_items: "{{ nvmevols }}" - when: (nvmevols | map(attribute='mountpoint') | list | unique | count == nvmevols | map(attribute='mountpoint') | list | count) - - # The following block mounts all nvme attached volumes that have a single, common mountpoint, by creating a logical volume - - name: autodisks_nvme | Mount nvme volumes in a single mountpoint through LV/VG - block: - #- debug: msg={{nvmevols | map(attribute='device') | join(',')}} - - - name: autodisks_nvme | Install logical volume management tooling. (yum - RedHat/CentOS) - become: true - yum: - name: "lvm*" - state: present - when: ansible_os_family == 'RedHat' - - - name: autodisks_nvme | Create a volume group from all nvme devices - become: yes - lvg: - vg: "{{ cluster_vars[buildenv].hosttype_vars[hostvars[inventory_hostname].hosttype].nvme.lvmparams.vg_name }}" - pvs: "{{nvmevols | map(attribute='device') | join(',')}}" - - - name: autodisks_nvme | Create a logical volume from volume group - become: yes - lvol: - vg: "{{ cluster_vars[buildenv].hosttype_vars[hostvars[inventory_hostname].hosttype].nvme.lvmparams.vg_name }}" - lv: "{{ cluster_vars[buildenv].hosttype_vars[hostvars[inventory_hostname].hosttype].nvme.lvmparams.lv_name }}" - size: "{{ cluster_vars[buildenv].hosttype_vars[hostvars[inventory_hostname].hosttype].nvme.lvmparams.lv_size }}" - - - name: autodisks_nvme | Create filesystem(s) on attached nvme volume(s) - become: yes - filesystem: - fstype: "{{ nvmevols | map(attribute='fstype') | list | unique | join('') }}" - dev: "/dev/{{ cluster_vars[buildenv].hosttype_vars[hostvars[inventory_hostname].hosttype].nvme.lvmparams.vg_name }}/{{ cluster_vars[buildenv].hosttype_vars[hostvars[inventory_hostname].hosttype].nvme.lvmparams.lv_name }}" - force: no - - - name: autodisks_nvme | Mount created filesytem(s) persistently - become: yes - mount: - path: "{{ nvmevols | map(attribute='mountpoint') | list | unique | join('') }}" - src: "/dev/{{ cluster_vars[buildenv].hosttype_vars[hostvars[inventory_hostname].hosttype].nvme.lvmparams.vg_name }}/{{ cluster_vars[buildenv].hosttype_vars[hostvars[inventory_hostname].hosttype].nvme.lvmparams.lv_name }}" - fstype: "{{ nvmevols | map(attribute='fstype') | list | unique | join('') }}" - state: mounted - opts: _netdev - when: (nvmevols | map(attribute='mountpoint') | list | unique | count == 1) and (nvmevols | map(attribute='mountpoint') | list | count >= 2) and (nvmevols | map(attribute='fstype') | list | unique | count == 1) - when: (cluster_vars[buildenv].hosttype_vars[hostvars[inventory_hostname].hosttype].nvme.volumes is defined) and (cluster_vars[buildenv].hosttype_vars[hostvars[inventory_hostname].hosttype].nvme.volumes|length > 0) and (cluster_vars[buildenv].hosttype_vars[hostvars[inventory_hostname].hosttype].nvme.volumes != "[]") diff --git a/config/tasks/disks_auto.yml b/config/tasks/disks_auto_generic.yml similarity index 79% rename from config/tasks/disks_auto.yml rename to config/tasks/disks_auto_generic.yml index aefd1d5b..cde01eac 100644 --- a/config/tasks/disks_auto.yml +++ b/config/tasks/disks_auto_generic.yml @@ -1,22 +1,24 @@ --- + #- debug: msg={{ cluster_vars[buildenv].hosttype_vars[hostvars[inventory_hostname].hosttype].auto_volumes }} #- debug: msg={{ ansible_facts.devices }} + - block: - - name: autodisks | Get unused block devices + - name: disks_auto_generic | Get unused block devices set_fact: block_devices: "{{ {'dev': item, 'size_b': (ansible_facts.devices[item].sectors|int) * (ansible_facts.devices[item].sectorsize|int)} }}" with_items: "{{ ansible_facts.devices }}" register: block_devices_list when: item | regex_search("nvme|[xvsh]+d") and ansible_facts.devices[item].partitions == {} - - name: autodisks | Create unused block devices list + - name: disks_auto_generic | Create unused block devices list set_fact: lsblk_volumes: "{{ block_devices_list.results | map(attribute='ansible_facts.block_devices') | select('defined') | list }}" - - name: autodisks | lsblk_volumes + - name: disks_auto_generic | lsblk_volumes debug: msg={{ lsblk_volumes }} -- name: autodisks | Create 'hostvols' fact that contains a list of available host devices (lsblk) mapped to the mountpoints defined in cluster_vars. Allow for multiple disks with same size. +- name: disks_auto_generic | Create 'hostvols' fact that contains a list of available host devices (lsblk) mapped to the mountpoints defined in cluster_vars. Allow for multiple disks with same size. set_fact: hostvols: | {% set res = [] -%} @@ -31,14 +33,13 @@ {%- set blkvolloop.break = true -%} {%- endif -%} {%- endfor -%} - {%- endfor -%} {{ res }} -#- name: autodisks | hostvols +#- name: disks_auto_generic | hostvols # debug: msg={{hostvols}} # Create partition-less filesystems. -- name: autodisks | Create filesystem(s) on attached volume(s) +- name: disks_auto_generic | Create filesystem(s) on attached volume(s) become: yes filesystem: fstype: "{{ item.fstype }}" @@ -50,7 +51,7 @@ delay: 1 until: created_filesystem is not failed -- name: autodisks | Mount created filesytem(s) persistently +- name: disks_auto_generic | Mount created filesytem(s) persistently become: yes mount: path: "{{ item.mountpoint }}" @@ -60,7 +61,7 @@ opts: _netdev with_items: "{{ hostvols }}" -- name: autodisks | change ownership of mountpoint (if set) +- name: disks_auto_generic | change ownership of mountpoint (if set) become: yes file: path: "{{ item.mountpoint }}" diff --git a/config/tasks/main.yml b/config/tasks/main.yml index e842014a..5a23db5e 100644 --- a/config/tasks/main.yml +++ b/config/tasks/main.yml @@ -52,13 +52,13 @@ mode: 0755 when: (static_journal is defined and static_journal|bool) - # Run this *before* the general auto_volumes tasks, because we need them to be eliminated before we try to mount the other disks. -- name: Attach nvme_volumes - include_tasks: disks_auto_aws_nvme.yml +- name: Create partition table, format and attach volumes - AWS + include_tasks: disks_auto_aws.yml when: cluster_vars.type == "aws" -- name: Attach auto_volumes - include_tasks: disks_auto.yml +- name: Create partition table, format and attach volumes - generic + include_tasks: disks_auto_generic.yml + when: cluster_vars.type != "aws" - name: install prometheus node exporter daemon include_tasks: prometheus_node_exporter.yml diff --git a/create/tasks/aws.yml b/create/tasks/aws.yml index 65192a18..4aee2484 100644 --- a/create/tasks/aws.yml +++ b/create/tasks/aws.yml @@ -1,5 +1,8 @@ --- +- name: cluster_hosts_target_denormalised_by_volume + debug: msg="{{cluster_hosts_target_denormalised_by_volume}}" + - name: create/aws | Create AWS security group ec2_group: name: "{{ cluster_name }}-sg" @@ -45,6 +48,7 @@ wait: yes instance_tags: "{{ _instance_tags | combine(cluster_vars.custom_tagslabels | default({})) }}" termination_protection: "{{cluster_vars[buildenv].termination_protection}}" + volumes: "{{ item.auto_volumes | selectattr('src', 'undefined') | list | default([]) }}" count_tag: { Name: "{{item.hostname}}" } exact_count: 1 vars: @@ -79,7 +83,7 @@ set_fact: cluster_hosts_created: "{{ r__async_status__ec2.results | json_query(\"[?changed==`true`].item.item\") }}" - - name: create/aws | Create new volumes asynchronously (or attach existing if src is present, e.g. via the _scheme_rmvm_keepdisk_rollback scheme) + - name: create/aws | Attach (or create) volumes where 'src' is present (e.g. inserted as part of _scheme_rmvm_keepdisk_rollback scheme) ec2_vol: aws_access_key: "{{cluster_vars[buildenv].aws_access_key}}" aws_secret_key: "{{cluster_vars[buildenv].aws_secret_key}}" @@ -92,7 +96,7 @@ volume_size: "{%- if 'src' not in item.auto_volume -%}{{item.auto_volume.volume_size}}{%- endif -%}" volume_type: "{{item.auto_volume.volume_type}}" delete_on_termination: yes - loop: "{{ cluster_hosts_target_denormalised_by_volume }}" + loop: "{{ cluster_hosts_target_denormalised_by_volume| selectattr('src', 'defined') | list }}" async: 7200 poll: 0 register: r__ec2_vol @@ -105,6 +109,9 @@ retries: 300 with_items: "{{r__ec2_vol.results}}" +# - name: create/aws | r__async_status__ec2_vol +# debug: msg={{r__async_status__ec2_vol}} + - name: create/aws | Tag the EBS volumes block: @@ -138,6 +145,7 @@ {{ res }} _tags: Name: "{{ item.hostname }}--{{item.device_name | regex_replace('^.*\\/(.*)', '\\1')}}" + device_name: "{{item.device_name}}" inv_node_version: "{{cluster_vars[buildenv].hosttype_vars[item.hosttype].version | default(omit)}}" inv_node_type: "{{item.hosttype}}" owner: "{{ lookup('env','USER') | lower }}" diff --git a/redeploy/_scheme_addallnew_rmdisk_rollback/tasks/main.yml b/redeploy/_scheme_addallnew_rmdisk_rollback/tasks/main.yml index 29ee5ac6..728a5618 100644 --- a/redeploy/_scheme_addallnew_rmdisk_rollback/tasks/main.yml +++ b/redeploy/_scheme_addallnew_rmdisk_rollback/tasks/main.yml @@ -5,7 +5,7 @@ - assert: { that: "non_current_hosts | length == 0", msg: "ERROR - All VMs must be in the 'current' lifecycle_state. Those not [{{non_current_hosts | join(',')}}]" } vars: non_current_hosts: "{{ cluster_hosts_state | json_query(\"[?tagslabels.lifecycle_state!='current'].name\") }}" - when: (canary=="start" or canary=="none") and (ignore_lifecycle_check is not defined or ignore_lifecycle_check==false) + when: canary=="start" or canary=="none" - name: Redeploy by replacing entire cluster; rollback on fail diff --git a/redeploy/_scheme_addnewvm_rmdisk_rollback/tasks/main.yml b/redeploy/_scheme_addnewvm_rmdisk_rollback/tasks/main.yml index 08f668a9..2a71a918 100644 --- a/redeploy/_scheme_addnewvm_rmdisk_rollback/tasks/main.yml +++ b/redeploy/_scheme_addnewvm_rmdisk_rollback/tasks/main.yml @@ -5,7 +5,7 @@ - assert: { that: "non_current_hosts | length == 0", msg: "ERROR - All VMs must be in the 'current' lifecycle_state. Those not [{{non_current_hosts | join(',')}}]" } vars: non_current_hosts: "{{ cluster_hosts_state | json_query(\"[?tagslabels.lifecycle_state!='current'].name\") }}" - when: (canary=="start" or canary=="none") and (ignore_lifecycle_check is not defined or ignore_lifecycle_check==false) + when: canary=="start" or canary=="none" - name: Redeploy by hosttype; rollback on fail diff --git a/redeploy/_scheme_rmvm_keepdisk_rollback/tasks/_add_src_diskinfo_to_cluster_hosts_target__aws.yml b/redeploy/_scheme_rmvm_keepdisk_rollback/tasks/_add_src_diskinfo_to_cluster_hosts_target__aws.yml index 20bde44d..07db0737 100644 --- a/redeploy/_scheme_rmvm_keepdisk_rollback/tasks/_add_src_diskinfo_to_cluster_hosts_target__aws.yml +++ b/redeploy/_scheme_rmvm_keepdisk_rollback/tasks/_add_src_diskinfo_to_cluster_hosts_target__aws.yml @@ -10,8 +10,8 @@ region: "{{cluster_vars.region}}" register: r__ec2_instance_info -#- name: _get_diskinfo_aws | r__ec2_instance_info -# debug: msg={{r__ec2_instance_info}} +- name: _get_diskinfo_aws | r__ec2_instance_info + debug: msg={{r__ec2_instance_info}} - name: _get_diskinfo_aws | augment cluster_hosts_target auto_volumes with source disk info set_fact: @@ -21,7 +21,7 @@ {%- for chs_host_info_result in r__ec2_instance_info.instances | selectattr('tags.lifecycle_state', '!=', 'current') -%} {%- if cht_host.hostname | regex_replace('-(?!.*-).*') == chs_host_info_result.tags.Name | regex_replace('-(?!.*-).*') -%} {%- for chs_host_diskinfo in chs_host_info_result.block_device_mappings | selectattr('device_name', '==', cht_autovol.device_name) -%} - {%- set _ = cht_autovol.update({'src': {'instance_id': chs_host_info_result.instance_id, 'volume_id': chs_host_diskinfo.ebs.volume_id }}) -%} + {%- set _ = cht_autovol.update({'src': {'instance_id': chs_host_info_result.instance_id, 'device_name': chs_host_diskinfo.device_name, 'volume_id': chs_host_diskinfo.ebs.volume_id }}) -%} {%- endfor -%} {%- endif -%} {%- endfor -%} diff --git a/redeploy/_scheme_rmvm_keepdisk_rollback/tasks/preflight.yml b/redeploy/_scheme_rmvm_keepdisk_rollback/tasks/preflight.yml index 600f955d..f04a2b13 100644 --- a/redeploy/_scheme_rmvm_keepdisk_rollback/tasks/preflight.yml +++ b/redeploy/_scheme_rmvm_keepdisk_rollback/tasks/preflight.yml @@ -3,6 +3,8 @@ - name: Preflight check block: - block: + - assert: { that: "cluster_vars.type != 'gcp'", fail_msg: "This scheme is not supported on GCP." } + - name: Preflight check | get ec2_instance_info for current disk information ec2_instance_info: filters: { "instance-state-name": [ "running", "stopped" ], "tag:cluster_name": "{{cluster_name}}", "tag:lifecycle_state": "current" } @@ -11,10 +13,10 @@ region: "{{cluster_vars.region}}" register: r__ec2_instance_info - - assert: { that: "_invalid_disks | length == 0", fail_msg: "EBS disks with a device_name of /dev/sd[b-e] cannot be reattached to a new instance (an AWS limitation) [found on: {{ _invalid_disks | join(',')}}]. To replace these, you must use a redeploy scheme that copies the disks." } + - assert: { that: "_invalid_disks | length == 0", fail_msg: "Disks cannot be attached to /dev/sd[b-e] after the instance has been created (these are supposed to be ephemeral mounts only, so can only exist if created with the VM). [Found on: {{ _invalid_disks | join(',')}}]. If you have EBS disks, you'll need to move them to another mount point (a redeploy scheme that replaces the disks will do this" } vars: { _invalid_disks: "{{ r__ec2_instance_info.instances | json_query(\"[?block_device_mappings[?contains(`/dev/sdb,/dev/sdc,/dev/sdd,/dev/sde`, device_name)]].tags.Name\") }}" } - - assert: { that: "ec2_disks_is_subset_of_target_disks|bool==false", fail_msg: "Existing EBS disks must have be same as (or a subset of) the auto_volumes defined in your cluster_vars definition.", success_msg: "Success - Existing EBS disks are the same as (or a subset of) the auto_volumes defined in your cluster_vars definition" } + - assert: { that: "ec2_disks_is_subset_of_target_disks|bool==false", fail_msg: "Existing EBS disks must be the same as (or a subset of) the auto_volumes defined in your cluster_vars definition.", success_msg: "Success - Existing EBS disks are the same as (or a subset of) the auto_volumes defined in your cluster_vars definition" } vars: ec2_disks_is_subset_of_target_disks: |- {%- set testloop = namespace(is_not_subset=false) -%} @@ -35,6 +37,6 @@ - assert: { that: "non_current_hosts | length == 0", fail_msg: "ERROR - All VMs must be in the 'current' lifecycle_state. Those not [{{non_current_hosts | join(',')}}]" } vars: { non_current_hosts: "{{ cluster_hosts_state | json_query(\"[?tagslabels.lifecycle_state!='current'].name\") }}" } - when: (canary=="start" or canary=="none") and (ignore_lifecycle_check is not defined or ignore_lifecycle_check==false) + when: (canary=="start" or canary=="none") - assert: { that: "(cluster_hosts_state | selectattr('tagslabels.lifecycle_state', '==', 'current') | list | length) == (cluster_hosts_target | length)", fail_msg: "Cannot use this scheme to redeploy to a different-sized cluster" } From 81e8f2e03c2fedb9354f338f24c66925628bb6f6 Mon Sep 17 00:00:00 2001 From: Dougal Seeley Date: Fri, 2 Oct 2020 21:28:59 +0100 Subject: [PATCH 13/58] [FIX] Fix the logic that writes the dynamic inventory to file (#71) --- EXAMPLE/README.md | 4 ++-- EXAMPLE/ansible.cfg | 1 + EXAMPLE/cluster.yml | 12 ++++++------ EXAMPLE/group_vars/_skel/cluster_vars.yml | 12 +++++++----- EXAMPLE/group_vars/test_aws_euw1/cluster_vars.yml | 5 +++-- EXAMPLE/group_vars/test_gcp_euw1/cluster_vars.yml | 5 +++-- _dependencies/defaults/main.yml | 3 --- config/tasks/metricbeat.yml | 0 dynamic_inventory/tasks/main.yml | 4 ++-- 9 files changed, 24 insertions(+), 22 deletions(-) mode change 100755 => 100644 config/tasks/metricbeat.yml diff --git a/EXAMPLE/README.md b/EXAMPLE/README.md index d20fc2f9..6f2e6c9c 100644 --- a/EXAMPLE/README.md +++ b/EXAMPLE/README.md @@ -1,7 +1,7 @@ # clusterverse-example   [![License](https://img.shields.io/badge/License-BSD%203--Clause-blue.svg)](https://opensource.org/licenses/BSD-3-Clause) ![PRs Welcome](https://img.shields.io/badge/PRs-Welcome-brightgreen.svg) This is an example of a deployment of [clusterverse](https://github.com/sky-uk/clusterverse) - _the full-lifecycle cloud infrastructure cluster management project, using Ansible._ -_**Please refer to the full [README.md](https://github.com/sky-uk/clusterverse/blob/master/README.md) in the main [clusterverse](https://github.com/sky-uk/clusterverse) repository.**_ +_**Please refer to the full [README.md](https://github.com/sky-uk/clusterverse/blob/master/README.md) in the main [clusterverse](https://github.com/sky-uk/clusterverse) repository.**_ ## Contributing Contributions are welcome and encouraged. Please see [CONTRIBUTING.md](https://github.com/sky-uk/clusterverse/blob/master/CONTRIBUTING.md) for details. @@ -54,7 +54,7 @@ ansible-playbook -u ubuntu --private-key=/home//.ssh/ cluster.yml ### GCP: ``` ansible-playbook -u --private-key=/home//.ssh/ cluster.yml -e buildenv=sandbox -e clusterid=vtp_gcp_euw1 --vault-id=sandbox@.vaultpass-client.py -ansible-playbook -u --private-key=/home//.ssh/ cluster.yml -e buildenv=sandbox -e clusterid=vtp_gcp_euw1 --vault-id=sandbox@.vaultpass-client.py --tags=clusterverse_clean -e clean=_all_ -e release_version=v1.0. +ansible-playbook -u --private-key=/home//.ssh/ cluster.yml -e buildenv=sandbox -e clusterid=vtp_gcp_euw1 --vault-id=sandbox@.vaultpass-client.py --tags=clusterverse_clean -e clean=_all_ -e release_version=v1.0.1 ansible-playbook -u --private-key=/home//.ssh/ cluster.yml -e buildenv=sandbox -e clusterid=vtp_gcp_euw1 --vault-id=sandbox@.vaultpass-client.py -e clean=_all_ ``` diff --git a/EXAMPLE/ansible.cfg b/EXAMPLE/ansible.cfg index 2bef101b..f73b5ea9 100644 --- a/EXAMPLE/ansible.cfg +++ b/EXAMPLE/ansible.cfg @@ -11,5 +11,6 @@ interpreter_python = auto [ssh_connection] retries=5 ssh_args = -o 'UserKnownHostsFile=/dev/null' -o 'ControlMaster=auto' -o 'ControlPersist=60s' +#ssh_args = -o 'UserKnownHostsFile=/dev/null' -o 'ControlMaster=auto' -o 'ControlPersist=60s' -o ProxyCommand="ssh -i -W %h:%p -q @>" ##To use with bastion pipelining = True control_path_dir=/tmp/.ansible/cp \ No newline at end of file diff --git a/EXAMPLE/cluster.yml b/EXAMPLE/cluster.yml index 762155b1..5df25bd9 100644 --- a/EXAMPLE/cluster.yml +++ b/EXAMPLE/cluster.yml @@ -7,24 +7,24 @@ tasks: - { name: "Get dependent roles via ansible-galaxy", local_action: "command ansible-galaxy install -fr requirements.yml", tags: ["always"] } - - { include_role: { name: "clusterverse/clean", apply: {tags: &roletag_clean ["clusterverse_clean"]} }, tags: *roletag_clean, when: "clean is defined" } - - { include_role: { name: "clusterverse/create", apply: {tags: &roletag_create ["clusterverse_create"]} }, tags: *roletag_create } - - { include_role: { name: "clusterverse/dynamic_inventory", apply: {tags: &roletag_dynamic_inventory ["clusterverse_dynamic_inventory"]} }, tags: *roletag_dynamic_inventory } + - { include_role: { name: "clusterverse/clean", apply: { tags: ["clusterverse_clean"]} }, tags: ["clusterverse_clean"], when: "clean is defined" } + - { include_role: { name: "clusterverse/create", apply: { tags: ["clusterverse_create"]} }, tags: ["clusterverse_create"] } + - { include_role: { name: "clusterverse/dynamic_inventory", apply: { tags: ["clusterverse_dynamic_inventory"]} }, tags: ["clusterverse_dynamic_inventory"] } - name: Configure the cluster hosts: all tasks: - - { include_role: { name: "clusterverse/config", apply: {tags: &roletag_config ["clusterverse_config"]} }, tags: *roletag_config } + - { include_role: { name: "clusterverse/config", apply: { tags: ["clusterverse_config"]} }, tags: ["clusterverse_config"] } ## Application roles - name: Application roles hosts: all tasks: - - { include_role: { name: "testrole", apply: {tags: &roletag_testrole ["testrole"]} }, tags: *roletag_testrole } + - { include_role: { name: "testrole", apply: { tags: ["testrole"]} }, tags: ["testrole"] } ## - name: Perform cluster readiness operations hosts: localhost connection: local tasks: - - { include_role: { name: "clusterverse/readiness", apply: {tags: &roletag_readiness ["clusterverse_readiness"]} }, tags: *roletag_readiness } + - { include_role: { name: "clusterverse/readiness", apply: { tags: ["clusterverse_readiness"]} }, tags: ["clusterverse_readiness"] } diff --git a/EXAMPLE/group_vars/_skel/cluster_vars.yml b/EXAMPLE/group_vars/_skel/cluster_vars.yml index c6ebfbde..5f2029f9 100644 --- a/EXAMPLE/group_vars/_skel/cluster_vars.yml +++ b/EXAMPLE/group_vars/_skel/cluster_vars.yml @@ -11,7 +11,7 @@ beats_config: filebeat: # output_logstash_hosts: ["localhost:5044"] # The destination hosts for filebeat-gathered logs # extra_logs_paths: # The array is optional, if you need to add more paths or files to scrape for logs -# - /var/log/myapp/*.log +# - /var/log/myapp/*.log metricbeat: # output_logstash_hosts: ["localhost:5044"] # The destination hosts for metricbeat-gathered metrics @@ -42,7 +42,8 @@ cluster_name: "{{app_name}}-{{buildenv}}" # Identifies the cluster within ### AWS example #cluster_vars: # type: &cloud_type "aws" -# image: "ami-0964eb2dc8b836eb6" # eu-west-1, 18.04, amd64, hvm-ssd, 20200430. Ubuntu images can be located at https://cloud-images.ubuntu.com/locator/ +# image: "ami-0c4c42893066a139e" # eu-west-1, 20.04, amd64, hvm-ssd, 20200924. Ubuntu images can be located at https://cloud-images.ubuntu.com/locator/ +# image: "ami-06868ad5a3642e4d7" # eu-west-1, 18.04, amd64, hvm-ssd, 20200923. Ubuntu images can be located at https://cloud-images.ubuntu.com/locator/ # region: ®ion "eu-west-1" # eu-west-1, us-west-2 # dns_cloud_internal_domain: "{{_region}}.compute.internal" # The cloud-internal zone as defined by the cloud provider (e.g. GCP, AWS) # dns_nameserver_zone: &dns_nameserver_zone "" # The zone that dns_server will operate on. gcloud dns needs a trailing '.'. Leave blank if no external DNS (use IPs only) @@ -89,10 +90,11 @@ cluster_name: "{{app_name}}-{{buildenv}}" # Identifies the cluster within ### GCP example #cluster_vars: # type: &cloud_type "gcp" -# image: "projects/ubuntu-os-cloud/global/images/ubuntu-1804-bionic-v20200430" # Ubuntu images can be located at https://cloud-images.ubuntu.com/locator/ +# image: "projects/ubuntu-os-cloud/global/images/ubuntu-2004-focal-v20200917" # Ubuntu images can be located at https://cloud-images.ubuntu.com/locator/ +# image: "projects/ubuntu-os-cloud/global/images/ubuntu-1804-bionic-v20200923" # Ubuntu images can be located at https://cloud-images.ubuntu.com/locator/ # region: ®ion "europe-west1" -# dns_cloud_internal_domain: "c.{{gcp_credentials_json.project_id}}.internal" # The cloud-internal zone as defined by the cloud provider (e.g. GCP, AWS) -# dns_nameserver_zone: &dns_nameserver_zone "" # The zone that dns_server will operate on. gcloud dns needs a trailing '.'. Leave blank if no external DNS (use IPs only) +# dns_cloud_internal_domain: "c.{{gcp_credentials_json.project_id}}.internal" # The cloud-internal zone as defined by the cloud provider (e.g. GCP, AWS) +# dns_nameserver_zone: &dns_nameserver_zone "" # The zone that dns_server will operate on. gcloud dns needs a trailing '.'. Leave blank if no external DNS (use IPs only) # dns_user_domain: "{%- if _dns_nameserver_zone -%}CUSTOM.PREFIXES.{{_dns_nameserver_zone}}{%- endif -%}" # A user-defined _domain_ part of the FDQN, (if more prefixes are required before the dns_nameserver_zone) # dns_server: "" # Specify DNS server. nsupdate, route53 or clouddns. If empty string is specified, no DNS will be added. # assign_public_ip: "yes" diff --git a/EXAMPLE/group_vars/test_aws_euw1/cluster_vars.yml b/EXAMPLE/group_vars/test_aws_euw1/cluster_vars.yml index a396a0bc..e262bf7d 100644 --- a/EXAMPLE/group_vars/test_aws_euw1/cluster_vars.yml +++ b/EXAMPLE/group_vars/test_aws_euw1/cluster_vars.yml @@ -11,7 +11,7 @@ beats_config: filebeat: # output_logstash_hosts: ["localhost:5044"] # The destination hosts for filebeat-gathered logs # extra_logs_paths: # The array is optional, if you need to add more paths or files to scrape for logs -# - /var/log/myapp/*.log +# - /var/log/myapp/*.log metricbeat: # output_logstash_hosts: ["localhost:5044"] # The destination hosts for metricbeat-gathered metrics @@ -41,7 +41,8 @@ cluster_name: "{{app_name}}-{{buildenv}}" # Identifies the cluster within cluster_vars: type: &cloud_type "aws" - image: "ami-0964eb2dc8b836eb6" # eu-west-1, 18.04, amd64, hvm-ssd, 20200430. Ubuntu images can be located at https://cloud-images.ubuntu.com/locator/ + image: "ami-0c4c42893066a139e" # eu-west-1, 20.04, amd64, hvm-ssd, 20200924. Ubuntu images can be located at https://cloud-images.ubuntu.com/locator/ +# image: "ami-06868ad5a3642e4d7" # eu-west-1, 18.04, amd64, hvm-ssd, 20200923. Ubuntu images can be located at https://cloud-images.ubuntu.com/locator/ region: ®ion "eu-west-1" # eu-west-1, us-west-2 dns_cloud_internal_domain: "{{_region}}.compute.internal" # The cloud-internal zone as defined by the cloud provider (e.g. GCP, AWS) dns_nameserver_zone: &dns_nameserver_zone "" # The zone that dns_server will operate on. gcloud dns needs a trailing '.'. Leave blank if no external DNS (use IPs only) diff --git a/EXAMPLE/group_vars/test_gcp_euw1/cluster_vars.yml b/EXAMPLE/group_vars/test_gcp_euw1/cluster_vars.yml index 02631539..4b08de80 100644 --- a/EXAMPLE/group_vars/test_gcp_euw1/cluster_vars.yml +++ b/EXAMPLE/group_vars/test_gcp_euw1/cluster_vars.yml @@ -15,7 +15,7 @@ beats_config: filebeat: # output_logstash_hosts: ["localhost:5044"] # The destination hosts for filebeat-gathered logs # extra_logs_paths: # The array is optional, if you need to add more paths or files to scrape for logs -# - /var/log/myapp/*.log +# - /var/log/myapp/*.log metricbeat: # output_logstash_hosts: ["localhost:5044"] # The destination hosts for metricbeat-gathered metrics @@ -45,7 +45,8 @@ cluster_name: "{{app_name}}-{{buildenv}}" # Identifies the cluster within cluster_vars: type: &cloud_type "gcp" - image: "projects/ubuntu-os-cloud/global/images/ubuntu-1804-bionic-v20200430" # Ubuntu images can be located at https://cloud-images.ubuntu.com/locator/ + image: "projects/ubuntu-os-cloud/global/images/ubuntu-2004-focal-v20200917" # Ubuntu images can be located at https://cloud-images.ubuntu.com/locator/ +# image: "projects/ubuntu-os-cloud/global/images/ubuntu-1804-bionic-v20200923" # Ubuntu images can be located at https://cloud-images.ubuntu.com/locator/ region: ®ion "europe-west1" dns_cloud_internal_domain: "c.{{gcp_credentials_json.project_id}}.internal" # The cloud-internal zone as defined by the cloud provider (e.g. GCP, AWS) dns_nameserver_zone: &dns_nameserver_zone "" # The zone that dns_server will operate on. gcloud dns needs a trailing '.'. Leave blank if no external DNS (use IPs only) diff --git a/_dependencies/defaults/main.yml b/_dependencies/defaults/main.yml index 1022c823..bb508201 100644 --- a/_dependencies/defaults/main.yml +++ b/_dependencies/defaults/main.yml @@ -3,9 +3,6 @@ # Identifies the application version that is being deployed (optional) release_version: "" -# Default redeploy scheme -redeploy_scheme: _scheme_addnewvm_rmdisk_rollback - # Default Prometheus node exporter configurations prometheus_node_exporter_install: true # Whether to install the prometheus node_exporter tool prometheus_node_exporter_port: "19100" # Port to export metrics to. The default (9100), conflicts with a Couchbase port, and prevents couchbase working. diff --git a/config/tasks/metricbeat.yml b/config/tasks/metricbeat.yml old mode 100755 new mode 100644 diff --git a/dynamic_inventory/tasks/main.yml b/dynamic_inventory/tasks/main.yml index 4c041104..06a7d102 100644 --- a/dynamic_inventory/tasks/main.yml +++ b/dynamic_inventory/tasks/main.yml @@ -22,7 +22,7 @@ - name: dynamic_inventory | Add hosts to dynamic inventory add_host: name: "{{ item.hostname }}" - groups: "{{ item.hosttype }},{{ cluster_name }},{{ clusterid }}{%- if 'regionzone' in item -%},{{ item.regionzone }}{%- endif -%}" + groups: "{{ item.hosttype }},{{ cluster_name }},{{ clusterid }}{% if 'regionzone' in item %},{{ item.regionzone }}{% endif %}" ansible_host: "{{ item.inventory_ip }}" hosttype: "{{ item.hosttype }}" regionzone: "{{ item.regionzone | default(omit) }}" @@ -40,7 +40,7 @@ {% if groupname not in ["all", "ungrouped"] -%} [{{ groupname }}] {% for hostname in groups[groupname] %} - {{ hostname }} ansible_host={{hostvars[hostname].ansible_host}} hosttype={{ hostvars[hostname].hosttype }} {%- if 'regionzone' in hostvars[hostname] -%}regionzone={{ hostvars[hostname].regionzone }}{%- endif -%} + {{ hostname }} ansible_host={{hostvars[hostname].ansible_host}} hosttype={{ hostvars[hostname].hosttype }} {% if 'regionzone' in hostvars[hostname] %}regionzone={{ hostvars[hostname].regionzone }}{% endif %}{{''}} {% endfor %} {% endif %} From 82468c14a85cfb05a13098737e7278eae55fbce6 Mon Sep 17 00:00:00 2001 From: Dougal Seeley Date: Mon, 5 Oct 2020 17:25:04 +0100 Subject: [PATCH 14/58] Update ebsmap.py to support NVME instance stores --- EXAMPLE/group_vars/_skel/cluster_vars.yml | 13 +++++---- .../group_vars/test_aws_euw1/cluster_vars.yml | 11 ++++---- _dependencies/library/ebsmap.py | 27 +++++++++++++++++-- _dependencies/library/ebsmap__LICENSE | 24 +++++++++++++++++ 4 files changed, 60 insertions(+), 15 deletions(-) diff --git a/EXAMPLE/group_vars/_skel/cluster_vars.yml b/EXAMPLE/group_vars/_skel/cluster_vars.yml index 4f004bd4..5a1c2b0a 100644 --- a/EXAMPLE/group_vars/_skel/cluster_vars.yml +++ b/EXAMPLE/group_vars/_skel/cluster_vars.yml @@ -71,15 +71,14 @@ cluster_name: "{{app_name}}-{{buildenv}}" # Identifies the cluster within # rule_desc: "Access from all VMs attached to the {{ cluster_name }}-sg group" # sandbox: # hosttype_vars: -# sys: {vms_by_az: {a: 1, b: 1, c: 1}, flavor: t3a.nano, version: "{{sys_version | default('')}}", auto_volumes: []} +# sys: {vms_by_az: {a: 1, b: 0, c: 0}, flavor: t3a.nano, version: "{{sys_version | default('')}}", auto_volumes: []} # #sysdisks2: {vms_by_az: {a: 1, b: 0, c: 0}, flavor: t3a.nano, version: "{{sysdisks_version | default('')}}", auto_volumes: [{"device_name": "/dev/sdf", mountpoint: "/media/mysvc", fstype: "ext4", "volume_type": "gp2", "volume_size": 1, encrypted: True, "delete_on_termination": true, perms: {owner: "root", group: "sudo", mode: "775"} }, {"device_name": "/dev/sdg", mountpoint: "/media/mysvc2", fstype: "ext4", "volume_type": "gp2", "volume_size": 1, encrypted: True, "delete_on_termination": true}]} # #sysdisks3: {vms_by_az: {a: 1, b: 0, c: 0}, flavor: t3a.nano, version: "{{sysdisks_version | default('')}}", auto_volumes: [{"device_name": "/dev/sdf", mountpoint: "/media/mysvc", fstype: "ext4", "volume_type": "gp2", "volume_size": 1, encrypted: True, "delete_on_termination": true, perms: {owner: "root", group: "sudo", mode: "775"} }, {"device_name": "/dev/sdg", mountpoint: "/media/mysvc2", fstype: "ext4", "volume_type": "gp2", "volume_size": 1, encrypted: True, "delete_on_termination": true}, {"device_name": "/dev/sdh", mountpoint: "/media/mysvc3", fstype: "ext4", "volume_type": "gp2", "volume_size": 1, encrypted: True, "delete_on_termination": true}]} -# #sysdisks_snapshot: {vms_by_az: {a: 1, b: 1, c: 0}, flavor: t3a.nano, version: "{{sys_version | default('')}}", auto_volumes: [{"snapshot_tags": {"tag:backup_id": "57180566894481854905"}, "device_name": "/dev/sdf", mountpoint: "/data", fstype: "ext4", "volume_type": "gp2", "volume_size": 1, encrypted: True, "delete_on_termination": true }]} -# #hostnvme_multi: {vms_by_az: {a: 1, b: 0, c: 0}, flavor: i3en.2xlarge, version: "{{sys_version | default('')}}", auto_volumes: [{device_name: "/dev/sdb", mountpoint: "/media/data", fstype: "ext4", "volume_type": "ephemeral", ephemeral: ephemeral0}, {device_name: "/dev/sdbc", mountpoint: "/media/data", fstype: "ext4", "volume_type": "ephemeral", ephemeral: ephemeral0}] } } -# #hostnvme_lvm: {vms_by_az: {a: 1, b: 0, c: 0}, flavor: i3en.2xlarge, version: "{{sys_version | default('')}}", auto_volumes: [{device_name: "/dev/sdb", mountpoint: "/media/data", fstype: "ext4", "volume_type": "ephemeral", ephemeral: ephemeral0}, {device_name: "/dev/sdbc", mountpoint: "/media/data", fstype: "ext4", "volume_type": "ephemeral", ephemeral: ephemeral0}], lvmparams: {vg_name: "vg0", lv_name: "lv0", lv_size: "+100%FREE"} } -# #hosthdd_multi: {vms_by_az: {a: 1, b: 0, c: 0}, flavor: d2.xlarge, version: "{{sys_version | default('')}}", auto_volumes: [{device_name: "/dev/sdb", mountpoint: "/media/mysvc", fstype: "ext4", "volume_type": "ephemeral", ephemeral: ephemeral0}, {device_name: "/dev/sdc", mountpoint: "/media/mysvc2", fstype: "ext4", "volume_type": "ephemeral", ephemeral: ephemeral1}, {device_name: "/dev/sdd", mountpoint: "/media/mysvc3", fstype: "ext4", "volume_type": "ephemeral", ephemeral: ephemeral2}] } -# #hosthdd_lvm: {vms_by_az: {a: 1, b: 0, c: 0}, flavor: d2.xlarge, version: "{{sys_version | default('')}}", auto_volumes: [{device_name: "/dev/sdb", mountpoint: "/media/data", fstype: "ext4", "volume_type": "ephemeral", ephemeral: ephemeral0}, {device_name: "/dev/sdc", mountpoint: "/media/data", fstype: "ext4", "volume_type": "ephemeral", ephemeral: ephemeral1}, {device_name: "/dev/sdd", mountpoint: "/media/data", fstype: "ext4", "volume_type": "ephemeral", ephemeral: ephemeral2}], lvmparams: {vg_name: "vg0", lv_name: "lv0", lv_size: "+100%FREE"} } -# #hostssd: {vms_by_az: {a: 1, b: 0, c: 0}, flavor: c3.large, version: "{{sys_version | default('')}}", auto_volumes: [{device_name: "/dev/sdf", mountpoint: "/media/mysvc", fstype: "ext4", "volume_type": "gp2", "volume_size": 1, ephemeral: 0, encrypted: True, "delete_on_termination": true}]} +# #sysdisks-snapshot: {vms_by_az: {a: 1, b: 1, c: 0}, flavor: t3a.nano, version: "{{sys_version | default('')}}", auto_volumes: [{"snapshot_tags": {"tag:backup_id": "57180566894481854905"}, "device_name": "/dev/sdf", mountpoint: "/media/data", fstype: "ext4", "volume_type": "gp2", "volume_size": 1, encrypted: True, "delete_on_termination": true }]} +# #hostnvme-multi: {vms_by_az: {a: 1, b: 0, c: 0}, flavor: i3en.2xlarge, version: "{{sys_version | default('')}}", auto_volumes: [{device_name: "/dev/sdb", mountpoint: "/media/mysvc", fstype: "ext4", "volume_type": "ephemeral", ephemeral: ephemeral0}, {device_name: "/dev/sdc", mountpoint: "/media/mysvc2", fstype: "ext4", "volume_type": "ephemeral", ephemeral: ephemeral1}, {"device_name": "/dev/sdf", mountpoint: "/media/mysvc8", fstype: "ext4", "volume_type": "gp2", "volume_size": 1, encrypted: True, "delete_on_termination": true }] } +# #hostnvme-lvm: {vms_by_az: {a: 1, b: 0, c: 0}, flavor: i3en.2xlarge, version: "{{sys_version | default('')}}", auto_volumes: [{device_name: "/dev/sdb", mountpoint: "/media/data", fstype: "ext4", "volume_type": "ephemeral", ephemeral: ephemeral0}, {device_name: "/dev/sdc", mountpoint: "/media/data", fstype: "ext4", "volume_type": "ephemeral", ephemeral: ephemeral1}], lvmparams: {vg_name: "vg0", lv_name: "lv0", lv_size: "+100%FREE"} } +# #hosthdd-multi: {vms_by_az: {a: 1, b: 0, c: 0}, flavor: d2.xlarge, version: "{{sys_version | default('')}}", auto_volumes: [{device_name: "/dev/sdb", mountpoint: "/media/mysvc", fstype: "ext4", "volume_type": "ephemeral", ephemeral: ephemeral0}, {device_name: "/dev/sdc", mountpoint: "/media/mysvc2", fstype: "ext4", "volume_type": "ephemeral", ephemeral: ephemeral1}, {device_name: "/dev/sdd", mountpoint: "/media/mysvc3", fstype: "ext4", "volume_type": "ephemeral", ephemeral: ephemeral2}] } +# #hosthdd-lvm: {vms_by_az: {a: 1, b: 0, c: 0}, flavor: d2.xlarge, version: "{{sys_version | default('')}}", auto_volumes: [{device_name: "/dev/sdb", mountpoint: "/media/data", fstype: "ext4", "volume_type": "ephemeral", ephemeral: ephemeral0}, {device_name: "/dev/sdc", mountpoint: "/media/data", fstype: "ext4", "volume_type": "ephemeral", ephemeral: ephemeral1}, {device_name: "/dev/sdd", mountpoint: "/media/data", fstype: "ext4", "volume_type": "ephemeral", ephemeral: ephemeral2}], lvmparams: {vg_name: "vg0", lv_name: "lv0", lv_size: "+100%FREE"} } # aws_access_key: "" # aws_secret_key: "" # vpc_name: "test{{buildenv}}" diff --git a/EXAMPLE/group_vars/test_aws_euw1/cluster_vars.yml b/EXAMPLE/group_vars/test_aws_euw1/cluster_vars.yml index 810f4be2..e063d8c8 100644 --- a/EXAMPLE/group_vars/test_aws_euw1/cluster_vars.yml +++ b/EXAMPLE/group_vars/test_aws_euw1/cluster_vars.yml @@ -81,12 +81,11 @@ cluster_vars: sys: {vms_by_az: {a: 1, b: 1, c: 1}, flavor: t3a.nano, version: "{{sys_version | default('')}}", auto_volumes: []} # sysdisks2: {vms_by_az: {a: 1, b: 0, c: 0}, flavor: t3a.nano, version: "{{sysdisks_version | default('')}}", auto_volumes: [{"device_name": "/dev/sdf", mountpoint: "/media/mysvc", fstype: "ext4", "volume_type": "gp2", "volume_size": 1, encrypted: True, "delete_on_termination": true, perms: {owner: "root", group: "sudo", mode: "775"} }, {"device_name": "/dev/sdg", mountpoint: "/media/mysvc2", fstype: "ext4", "volume_type": "gp2", "volume_size": 1, encrypted: True, "delete_on_termination": true}]} # sysdisks3: {vms_by_az: {a: 1, b: 0, c: 0}, flavor: t3a.nano, version: "{{sysdisks_version | default('')}}", auto_volumes: [{"device_name": "/dev/sdf", mountpoint: "/media/mysvc", fstype: "ext4", "volume_type": "gp2", "volume_size": 1, encrypted: True, "delete_on_termination": true, perms: {owner: "root", group: "sudo", mode: "775"} }, {"device_name": "/dev/sdg", mountpoint: "/media/mysvc2", fstype: "ext4", "volume_type": "gp2", "volume_size": 1, encrypted: True, "delete_on_termination": true}, {"device_name": "/dev/sdh", mountpoint: "/media/mysvc3", fstype: "ext4", "volume_type": "gp2", "volume_size": 1, encrypted: True, "delete_on_termination": true}]} -# sysdisks_snapshot: {vms_by_az: {a: 1, b: 1, c: 0}, flavor: t3a.nano, version: "{{sys_version | default('')}}", auto_volumes: [{"snapshot_tags": {"tag:backup_id": "57180566894481854905"}, "device_name": "/dev/sdf", mountpoint: "/data", fstype: "ext4", "volume_type": "gp2", "volume_size": 1, encrypted: True, "delete_on_termination": true }]} -# hostnvme_multi: {vms_by_az: {a: 1, b: 0, c: 0}, flavor: i3en.2xlarge, version: "{{sys_version | default('')}}", auto_volumes: [{device_name: "/dev/sdb", mountpoint: "/media/data", fstype: "ext4", "volume_type": "ephemeral", ephemeral: ephemeral0}, {device_name: "/dev/sdbc", mountpoint: "/media/data", fstype: "ext4", "volume_type": "ephemeral", ephemeral: ephemeral0}] } } -# hostnvme_lvm: {vms_by_az: {a: 1, b: 0, c: 0}, flavor: i3en.2xlarge, version: "{{sys_version | default('')}}", auto_volumes: [{device_name: "/dev/sdb", mountpoint: "/media/data", fstype: "ext4", "volume_type": "ephemeral", ephemeral: ephemeral0}, {device_name: "/dev/sdbc", mountpoint: "/media/data", fstype: "ext4", "volume_type": "ephemeral", ephemeral: ephemeral0}], lvmparams: {vg_name: "vg0", lv_name: "lv0", lv_size: "+100%FREE"} } -# hosthdd_multi: {vms_by_az: {a: 1, b: 0, c: 0}, flavor: d2.xlarge, version: "{{sys_version | default('')}}", auto_volumes: [{device_name: "/dev/sdb", mountpoint: "/media/mysvc", fstype: "ext4", "volume_type": "ephemeral", ephemeral: ephemeral0}, {device_name: "/dev/sdc", mountpoint: "/media/mysvc2", fstype: "ext4", "volume_type": "ephemeral", ephemeral: ephemeral1}, {device_name: "/dev/sdd", mountpoint: "/media/mysvc3", fstype: "ext4", "volume_type": "ephemeral", ephemeral: ephemeral2}] } -# hosthdd_lvm: {vms_by_az: {a: 1, b: 0, c: 0}, flavor: d2.xlarge, version: "{{sys_version | default('')}}", auto_volumes: [{device_name: "/dev/sdb", mountpoint: "/media/data", fstype: "ext4", "volume_type": "ephemeral", ephemeral: ephemeral0}, {device_name: "/dev/sdc", mountpoint: "/media/data", fstype: "ext4", "volume_type": "ephemeral", ephemeral: ephemeral1}, {device_name: "/dev/sdd", mountpoint: "/media/data", fstype: "ext4", "volume_type": "ephemeral", ephemeral: ephemeral2}], lvmparams: {vg_name: "vg0", lv_name: "lv0", lv_size: "+100%FREE"} } -# hostssd: {vms_by_az: {a: 1, b: 0, c: 0}, flavor: c3.large, version: "{{sys_version | default('')}}", auto_volumes: [{device_name: "/dev/sdf", mountpoint: "/media/mysvc", fstype: "ext4", "volume_type": "gp2", "volume_size": 1, ephemeral: 0, encrypted: True, "delete_on_termination": true}]} +# sysdisks-snapshot: {vms_by_az: {a: 1, b: 1, c: 0}, flavor: t3a.nano, version: "{{sys_version | default('')}}", auto_volumes: [{"snapshot_tags": {"tag:backup_id": "57180566894481854905"}, "device_name": "/dev/sdf", mountpoint: "/media/data", fstype: "ext4", "volume_type": "gp2", "volume_size": 1, encrypted: True, "delete_on_termination": true }]} +# hostnvme-multi: {vms_by_az: {a: 1, b: 0, c: 0}, flavor: i3en.2xlarge, version: "{{sys_version | default('')}}", auto_volumes: [{device_name: "/dev/sdb", mountpoint: "/media/mysvc", fstype: "ext4", "volume_type": "ephemeral", ephemeral: ephemeral0}, {device_name: "/dev/sdc", mountpoint: "/media/mysvc2", fstype: "ext4", "volume_type": "ephemeral", ephemeral: ephemeral1}, {"device_name": "/dev/sdf", mountpoint: "/media/mysvc8", fstype: "ext4", "volume_type": "gp2", "volume_size": 1, encrypted: True, "delete_on_termination": true }] } +# hostnvme-lvm: {vms_by_az: {a: 1, b: 0, c: 0}, flavor: i3en.2xlarge, version: "{{sys_version | default('')}}", auto_volumes: [{device_name: "/dev/sdb", mountpoint: "/media/data", fstype: "ext4", "volume_type": "ephemeral", ephemeral: ephemeral0}, {device_name: "/dev/sdc", mountpoint: "/media/data", fstype: "ext4", "volume_type": "ephemeral", ephemeral: ephemeral1}], lvmparams: {vg_name: "vg0", lv_name: "lv0", lv_size: "+100%FREE"} } +# hosthdd-multi: {vms_by_az: {a: 1, b: 0, c: 0}, flavor: d2.xlarge, version: "{{sys_version | default('')}}", auto_volumes: [{device_name: "/dev/sdb", mountpoint: "/media/mysvc", fstype: "ext4", "volume_type": "ephemeral", ephemeral: ephemeral0}, {device_name: "/dev/sdc", mountpoint: "/media/mysvc2", fstype: "ext4", "volume_type": "ephemeral", ephemeral: ephemeral1}, {device_name: "/dev/sdd", mountpoint: "/media/mysvc3", fstype: "ext4", "volume_type": "ephemeral", ephemeral: ephemeral2}] } +# hosthdd-lvm: {vms_by_az: {a: 1, b: 0, c: 0}, flavor: d2.xlarge, version: "{{sys_version | default('')}}", auto_volumes: [{device_name: "/dev/sdb", mountpoint: "/media/data", fstype: "ext4", "volume_type": "ephemeral", ephemeral: ephemeral0}, {device_name: "/dev/sdc", mountpoint: "/media/data", fstype: "ext4", "volume_type": "ephemeral", ephemeral: ephemeral1}, {device_name: "/dev/sdd", mountpoint: "/media/data", fstype: "ext4", "volume_type": "ephemeral", ephemeral: ephemeral2}], lvmparams: {vg_name: "vg0", lv_name: "lv0", lv_size: "+100%FREE"} } aws_access_key: "" aws_secret_key: "" vpc_name: "test{{buildenv}}" diff --git a/_dependencies/library/ebsmap.py b/_dependencies/library/ebsmap.py index dbd6d717..0cb47d6a 100644 --- a/_dependencies/library/ebsmap.py +++ b/_dependencies/library/ebsmap.py @@ -110,6 +110,11 @@ except: pass +try: + from urllib.request import urlopen +except ImportError: + from urllib2 import urlopen + NVME_ADMIN_IDENTIFY = 0x06 NVME_IOCTL_ADMIN_CMD = 0xC0484E41 AMZN_NVME_VID = 0x1D0F @@ -243,10 +248,22 @@ def fail_json(self, msg): module = cDummyAnsibleModule() # Get all existing block volumes by key=value, then parse this into a dictionary (which excludes non disk and partition block types, e.g. ram, loop). - lsblk_devices = subprocess.check_output(['lsblk', '-o', 'NAME,TYPE,UUID,FSTYPE,PARTLABEL,MOUNTPOINT,SERIAL', '-P']).decode().rstrip().split('\n') + lsblk_devices = subprocess.check_output(['lsblk', '-o', 'NAME,TYPE,UUID,FSTYPE,MOUNTPOINT,MODEL,SERIAL', '-P']).decode().rstrip().split('\n') os_device_names = [dict((map(lambda x: x.strip("\""), sub.split("="))) for sub in dev.split('\" ') if '=' in sub) for dev in lsblk_devices] os_device_names = [dev for dev in os_device_names if dev['TYPE'] in ['disk', 'part', 'lvm']] - + os_device_names.sort(key=lambda k: k['NAME']) + + # Instance stores (AKA ephemeral volumes) do not appear to have a defined endpoint that maps between the /dev/sd[b-e] defined in the instance creation map, and the OS /dev/nvme[0-26]n1 device. + # For this scenario, we can only return the instance stores in the order that they are defined. Because instance stores do not survive a poweroff and cannot be detached and reattached, the order doesn't matter as much. + instance_store_map = [] + with urlopen("http://169.254.169.254/latest/meta-data/block-device-mapping/") as response__block_device_mapping: + block_device_mappings = response__block_device_mapping.read().decode().split("\n") + for block_device_mappings__ephemeral_id in [dev for dev in block_device_mappings if dev.startswith('ephemeral')]: + with urlopen("http://169.254.169.254/latest/meta-data/block-device-mapping/" + block_device_mappings__ephemeral_id) as response__ephemeral_device: + block_device_mappings__ephemeral_mapped = response__ephemeral_device.read().decode() + instance_store_map.append({'ephemeral_id': block_device_mappings__ephemeral_id, 'ephemeral_map': block_device_mappings__ephemeral_mapped}) + + instance_store_count = 0 for os_device in os_device_names: os_device_path = "/dev/" + os_device['NAME'] if os_device['NAME'].startswith("nvme"): @@ -254,6 +271,12 @@ def fail_json(self, msg): dev = ebs_nvme_device(os_device_path) except FileNotFoundError as e: module.fail_json(msg=os_device_path + ": FileNotFoundError" + str(e)) + except TypeError as e: + if instance_store_count < len(instance_store_map): + os_device.update({"device_name_os": os_device_path, "device_name_aws": '/dev/' + instance_store_map[instance_store_count]['ephemeral_map'], "volume_id": dev.get_volume_id()}) + instance_store_count += 1 + else: + module.warn(u"%s is not an EBS device and there is no instance store mapping." % os_device_path) except OSError as e: module.warn(u"%s is not an nvme device." % os_device_path) else: diff --git a/_dependencies/library/ebsmap__LICENSE b/_dependencies/library/ebsmap__LICENSE index 3c642ec5..fb891162 100644 --- a/_dependencies/library/ebsmap__LICENSE +++ b/_dependencies/library/ebsmap__LICENSE @@ -27,3 +27,27 @@ SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + + +--- + +Copyright 2017 Amazon.com, Inc. and its affiliates. All Rights Reserved. + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is furnished +to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in all +copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +SOFTWARE. + From 2d75c5065aa40f03e3731594795a932fc54539ff Mon Sep 17 00:00:00 2001 From: Dougal Seeley Date: Mon, 5 Oct 2020 17:29:04 +0100 Subject: [PATCH 15/58] Update ebsmap.py to support NVME instance stores --- EXAMPLE/group_vars/_skel/cluster_vars.yml | 13 +++++---- .../group_vars/test_aws_euw1/cluster_vars.yml | 11 ++++---- _dependencies/library/ebsmap.py | 27 +++++++++++++++++-- 3 files changed, 36 insertions(+), 15 deletions(-) diff --git a/EXAMPLE/group_vars/_skel/cluster_vars.yml b/EXAMPLE/group_vars/_skel/cluster_vars.yml index e1081886..2e9a5616 100644 --- a/EXAMPLE/group_vars/_skel/cluster_vars.yml +++ b/EXAMPLE/group_vars/_skel/cluster_vars.yml @@ -71,15 +71,14 @@ cluster_name: "{{app_name}}-{{buildenv}}" # Identifies the cluster within # rule_desc: "Access from all VMs attached to the {{ cluster_name }}-sg group" # sandbox: # hosttype_vars: -# sys: {vms_by_az: {a: 1, b: 1, c: 1}, flavor: t3a.nano, version: "{{sys_version | default('')}}", auto_volumes: []} +# sys: {vms_by_az: {a: 1, b: 0, c: 0}, flavor: t3a.nano, version: "{{sys_version | default('')}}", auto_volumes: []} # #sysdisks2: {vms_by_az: {a: 1, b: 0, c: 0}, flavor: t3a.nano, version: "{{sysdisks_version | default('')}}", auto_volumes: [{"device_name": "/dev/sdf", mountpoint: "/media/mysvc", fstype: "ext4", "volume_type": "gp2", "volume_size": 1, encrypted: True, "delete_on_termination": true, perms: {owner: "root", group: "sudo", mode: "775"} }, {"device_name": "/dev/sdg", mountpoint: "/media/mysvc2", fstype: "ext4", "volume_type": "gp2", "volume_size": 1, encrypted: True, "delete_on_termination": true}]} # #sysdisks3: {vms_by_az: {a: 1, b: 0, c: 0}, flavor: t3a.nano, version: "{{sysdisks_version | default('')}}", auto_volumes: [{"device_name": "/dev/sdf", mountpoint: "/media/mysvc", fstype: "ext4", "volume_type": "gp2", "volume_size": 1, encrypted: True, "delete_on_termination": true, perms: {owner: "root", group: "sudo", mode: "775"} }, {"device_name": "/dev/sdg", mountpoint: "/media/mysvc2", fstype: "ext4", "volume_type": "gp2", "volume_size": 1, encrypted: True, "delete_on_termination": true}, {"device_name": "/dev/sdh", mountpoint: "/media/mysvc3", fstype: "ext4", "volume_type": "gp2", "volume_size": 1, encrypted: True, "delete_on_termination": true}]} -# #sysdisks_snapshot: {vms_by_az: {a: 1, b: 1, c: 0}, flavor: t3a.nano, version: "{{sys_version | default('')}}", auto_volumes: [{"snapshot_tags": {"tag:backup_id": "57180566894481854905"}, "device_name": "/dev/sdf", mountpoint: "/data", fstype: "ext4", "volume_type": "gp2", "volume_size": 1, encrypted: True, "delete_on_termination": true }]} -# #hostnvme_multi: {vms_by_az: {a: 1, b: 0, c: 0}, flavor: i3en.2xlarge, version: "{{sys_version | default('')}}", auto_volumes: [{device_name: "/dev/sdb", mountpoint: "/media/data", fstype: "ext4", "volume_type": "ephemeral", ephemeral: ephemeral0}, {device_name: "/dev/sdbc", mountpoint: "/media/data", fstype: "ext4", "volume_type": "ephemeral", ephemeral: ephemeral0}] } } -# #hostnvme_lvm: {vms_by_az: {a: 1, b: 0, c: 0}, flavor: i3en.2xlarge, version: "{{sys_version | default('')}}", auto_volumes: [{device_name: "/dev/sdb", mountpoint: "/media/data", fstype: "ext4", "volume_type": "ephemeral", ephemeral: ephemeral0}, {device_name: "/dev/sdbc", mountpoint: "/media/data", fstype: "ext4", "volume_type": "ephemeral", ephemeral: ephemeral0}], lvmparams: {vg_name: "vg0", lv_name: "lv0", lv_size: "+100%FREE"} } -# #hosthdd_multi: {vms_by_az: {a: 1, b: 0, c: 0}, flavor: d2.xlarge, version: "{{sys_version | default('')}}", auto_volumes: [{device_name: "/dev/sdb", mountpoint: "/media/mysvc", fstype: "ext4", "volume_type": "ephemeral", ephemeral: ephemeral0}, {device_name: "/dev/sdc", mountpoint: "/media/mysvc2", fstype: "ext4", "volume_type": "ephemeral", ephemeral: ephemeral1}, {device_name: "/dev/sdd", mountpoint: "/media/mysvc3", fstype: "ext4", "volume_type": "ephemeral", ephemeral: ephemeral2}] } -# #hosthdd_lvm: {vms_by_az: {a: 1, b: 0, c: 0}, flavor: d2.xlarge, version: "{{sys_version | default('')}}", auto_volumes: [{device_name: "/dev/sdb", mountpoint: "/media/data", fstype: "ext4", "volume_type": "ephemeral", ephemeral: ephemeral0}, {device_name: "/dev/sdc", mountpoint: "/media/data", fstype: "ext4", "volume_type": "ephemeral", ephemeral: ephemeral1}, {device_name: "/dev/sdd", mountpoint: "/media/data", fstype: "ext4", "volume_type": "ephemeral", ephemeral: ephemeral2}], lvmparams: {vg_name: "vg0", lv_name: "lv0", lv_size: "+100%FREE"} } -# #hostssd: {vms_by_az: {a: 1, b: 0, c: 0}, flavor: c3.large, version: "{{sys_version | default('')}}", auto_volumes: [{device_name: "/dev/sdf", mountpoint: "/media/mysvc", fstype: "ext4", "volume_type": "gp2", "volume_size": 1, ephemeral: 0, encrypted: True, "delete_on_termination": true}]} +# #sysdisks-snapshot: {vms_by_az: {a: 1, b: 1, c: 0}, flavor: t3a.nano, version: "{{sys_version | default('')}}", auto_volumes: [{"snapshot_tags": {"tag:backup_id": "57180566894481854905"}, "device_name": "/dev/sdf", mountpoint: "/media/data", fstype: "ext4", "volume_type": "gp2", "volume_size": 1, encrypted: True, "delete_on_termination": true }]} +# #hostnvme-multi: {vms_by_az: {a: 1, b: 0, c: 0}, flavor: i3en.2xlarge, version: "{{sys_version | default('')}}", auto_volumes: [{device_name: "/dev/sdb", mountpoint: "/media/mysvc", fstype: "ext4", "volume_type": "ephemeral", ephemeral: ephemeral0}, {device_name: "/dev/sdc", mountpoint: "/media/mysvc2", fstype: "ext4", "volume_type": "ephemeral", ephemeral: ephemeral1}, {"device_name": "/dev/sdf", mountpoint: "/media/mysvc8", fstype: "ext4", "volume_type": "gp2", "volume_size": 1, encrypted: True, "delete_on_termination": true }] } +# #hostnvme-lvm: {vms_by_az: {a: 1, b: 0, c: 0}, flavor: i3en.2xlarge, version: "{{sys_version | default('')}}", auto_volumes: [{device_name: "/dev/sdb", mountpoint: "/media/data", fstype: "ext4", "volume_type": "ephemeral", ephemeral: ephemeral0}, {device_name: "/dev/sdc", mountpoint: "/media/data", fstype: "ext4", "volume_type": "ephemeral", ephemeral: ephemeral1}], lvmparams: {vg_name: "vg0", lv_name: "lv0", lv_size: "+100%FREE"} } +# #hosthdd-multi: {vms_by_az: {a: 1, b: 0, c: 0}, flavor: d2.xlarge, version: "{{sys_version | default('')}}", auto_volumes: [{device_name: "/dev/sdb", mountpoint: "/media/mysvc", fstype: "ext4", "volume_type": "ephemeral", ephemeral: ephemeral0}, {device_name: "/dev/sdc", mountpoint: "/media/mysvc2", fstype: "ext4", "volume_type": "ephemeral", ephemeral: ephemeral1}, {device_name: "/dev/sdd", mountpoint: "/media/mysvc3", fstype: "ext4", "volume_type": "ephemeral", ephemeral: ephemeral2}] } +# #hosthdd-lvm: {vms_by_az: {a: 1, b: 0, c: 0}, flavor: d2.xlarge, version: "{{sys_version | default('')}}", auto_volumes: [{device_name: "/dev/sdb", mountpoint: "/media/data", fstype: "ext4", "volume_type": "ephemeral", ephemeral: ephemeral0}, {device_name: "/dev/sdc", mountpoint: "/media/data", fstype: "ext4", "volume_type": "ephemeral", ephemeral: ephemeral1}, {device_name: "/dev/sdd", mountpoint: "/media/data", fstype: "ext4", "volume_type": "ephemeral", ephemeral: ephemeral2}], lvmparams: {vg_name: "vg0", lv_name: "lv0", lv_size: "+100%FREE"} } # aws_access_key: "" # aws_secret_key: "" # vpc_name: "test{{buildenv}}" diff --git a/EXAMPLE/group_vars/test_aws_euw1/cluster_vars.yml b/EXAMPLE/group_vars/test_aws_euw1/cluster_vars.yml index 810f4be2..e063d8c8 100644 --- a/EXAMPLE/group_vars/test_aws_euw1/cluster_vars.yml +++ b/EXAMPLE/group_vars/test_aws_euw1/cluster_vars.yml @@ -81,12 +81,11 @@ cluster_vars: sys: {vms_by_az: {a: 1, b: 1, c: 1}, flavor: t3a.nano, version: "{{sys_version | default('')}}", auto_volumes: []} # sysdisks2: {vms_by_az: {a: 1, b: 0, c: 0}, flavor: t3a.nano, version: "{{sysdisks_version | default('')}}", auto_volumes: [{"device_name": "/dev/sdf", mountpoint: "/media/mysvc", fstype: "ext4", "volume_type": "gp2", "volume_size": 1, encrypted: True, "delete_on_termination": true, perms: {owner: "root", group: "sudo", mode: "775"} }, {"device_name": "/dev/sdg", mountpoint: "/media/mysvc2", fstype: "ext4", "volume_type": "gp2", "volume_size": 1, encrypted: True, "delete_on_termination": true}]} # sysdisks3: {vms_by_az: {a: 1, b: 0, c: 0}, flavor: t3a.nano, version: "{{sysdisks_version | default('')}}", auto_volumes: [{"device_name": "/dev/sdf", mountpoint: "/media/mysvc", fstype: "ext4", "volume_type": "gp2", "volume_size": 1, encrypted: True, "delete_on_termination": true, perms: {owner: "root", group: "sudo", mode: "775"} }, {"device_name": "/dev/sdg", mountpoint: "/media/mysvc2", fstype: "ext4", "volume_type": "gp2", "volume_size": 1, encrypted: True, "delete_on_termination": true}, {"device_name": "/dev/sdh", mountpoint: "/media/mysvc3", fstype: "ext4", "volume_type": "gp2", "volume_size": 1, encrypted: True, "delete_on_termination": true}]} -# sysdisks_snapshot: {vms_by_az: {a: 1, b: 1, c: 0}, flavor: t3a.nano, version: "{{sys_version | default('')}}", auto_volumes: [{"snapshot_tags": {"tag:backup_id": "57180566894481854905"}, "device_name": "/dev/sdf", mountpoint: "/data", fstype: "ext4", "volume_type": "gp2", "volume_size": 1, encrypted: True, "delete_on_termination": true }]} -# hostnvme_multi: {vms_by_az: {a: 1, b: 0, c: 0}, flavor: i3en.2xlarge, version: "{{sys_version | default('')}}", auto_volumes: [{device_name: "/dev/sdb", mountpoint: "/media/data", fstype: "ext4", "volume_type": "ephemeral", ephemeral: ephemeral0}, {device_name: "/dev/sdbc", mountpoint: "/media/data", fstype: "ext4", "volume_type": "ephemeral", ephemeral: ephemeral0}] } } -# hostnvme_lvm: {vms_by_az: {a: 1, b: 0, c: 0}, flavor: i3en.2xlarge, version: "{{sys_version | default('')}}", auto_volumes: [{device_name: "/dev/sdb", mountpoint: "/media/data", fstype: "ext4", "volume_type": "ephemeral", ephemeral: ephemeral0}, {device_name: "/dev/sdbc", mountpoint: "/media/data", fstype: "ext4", "volume_type": "ephemeral", ephemeral: ephemeral0}], lvmparams: {vg_name: "vg0", lv_name: "lv0", lv_size: "+100%FREE"} } -# hosthdd_multi: {vms_by_az: {a: 1, b: 0, c: 0}, flavor: d2.xlarge, version: "{{sys_version | default('')}}", auto_volumes: [{device_name: "/dev/sdb", mountpoint: "/media/mysvc", fstype: "ext4", "volume_type": "ephemeral", ephemeral: ephemeral0}, {device_name: "/dev/sdc", mountpoint: "/media/mysvc2", fstype: "ext4", "volume_type": "ephemeral", ephemeral: ephemeral1}, {device_name: "/dev/sdd", mountpoint: "/media/mysvc3", fstype: "ext4", "volume_type": "ephemeral", ephemeral: ephemeral2}] } -# hosthdd_lvm: {vms_by_az: {a: 1, b: 0, c: 0}, flavor: d2.xlarge, version: "{{sys_version | default('')}}", auto_volumes: [{device_name: "/dev/sdb", mountpoint: "/media/data", fstype: "ext4", "volume_type": "ephemeral", ephemeral: ephemeral0}, {device_name: "/dev/sdc", mountpoint: "/media/data", fstype: "ext4", "volume_type": "ephemeral", ephemeral: ephemeral1}, {device_name: "/dev/sdd", mountpoint: "/media/data", fstype: "ext4", "volume_type": "ephemeral", ephemeral: ephemeral2}], lvmparams: {vg_name: "vg0", lv_name: "lv0", lv_size: "+100%FREE"} } -# hostssd: {vms_by_az: {a: 1, b: 0, c: 0}, flavor: c3.large, version: "{{sys_version | default('')}}", auto_volumes: [{device_name: "/dev/sdf", mountpoint: "/media/mysvc", fstype: "ext4", "volume_type": "gp2", "volume_size": 1, ephemeral: 0, encrypted: True, "delete_on_termination": true}]} +# sysdisks-snapshot: {vms_by_az: {a: 1, b: 1, c: 0}, flavor: t3a.nano, version: "{{sys_version | default('')}}", auto_volumes: [{"snapshot_tags": {"tag:backup_id": "57180566894481854905"}, "device_name": "/dev/sdf", mountpoint: "/media/data", fstype: "ext4", "volume_type": "gp2", "volume_size": 1, encrypted: True, "delete_on_termination": true }]} +# hostnvme-multi: {vms_by_az: {a: 1, b: 0, c: 0}, flavor: i3en.2xlarge, version: "{{sys_version | default('')}}", auto_volumes: [{device_name: "/dev/sdb", mountpoint: "/media/mysvc", fstype: "ext4", "volume_type": "ephemeral", ephemeral: ephemeral0}, {device_name: "/dev/sdc", mountpoint: "/media/mysvc2", fstype: "ext4", "volume_type": "ephemeral", ephemeral: ephemeral1}, {"device_name": "/dev/sdf", mountpoint: "/media/mysvc8", fstype: "ext4", "volume_type": "gp2", "volume_size": 1, encrypted: True, "delete_on_termination": true }] } +# hostnvme-lvm: {vms_by_az: {a: 1, b: 0, c: 0}, flavor: i3en.2xlarge, version: "{{sys_version | default('')}}", auto_volumes: [{device_name: "/dev/sdb", mountpoint: "/media/data", fstype: "ext4", "volume_type": "ephemeral", ephemeral: ephemeral0}, {device_name: "/dev/sdc", mountpoint: "/media/data", fstype: "ext4", "volume_type": "ephemeral", ephemeral: ephemeral1}], lvmparams: {vg_name: "vg0", lv_name: "lv0", lv_size: "+100%FREE"} } +# hosthdd-multi: {vms_by_az: {a: 1, b: 0, c: 0}, flavor: d2.xlarge, version: "{{sys_version | default('')}}", auto_volumes: [{device_name: "/dev/sdb", mountpoint: "/media/mysvc", fstype: "ext4", "volume_type": "ephemeral", ephemeral: ephemeral0}, {device_name: "/dev/sdc", mountpoint: "/media/mysvc2", fstype: "ext4", "volume_type": "ephemeral", ephemeral: ephemeral1}, {device_name: "/dev/sdd", mountpoint: "/media/mysvc3", fstype: "ext4", "volume_type": "ephemeral", ephemeral: ephemeral2}] } +# hosthdd-lvm: {vms_by_az: {a: 1, b: 0, c: 0}, flavor: d2.xlarge, version: "{{sys_version | default('')}}", auto_volumes: [{device_name: "/dev/sdb", mountpoint: "/media/data", fstype: "ext4", "volume_type": "ephemeral", ephemeral: ephemeral0}, {device_name: "/dev/sdc", mountpoint: "/media/data", fstype: "ext4", "volume_type": "ephemeral", ephemeral: ephemeral1}, {device_name: "/dev/sdd", mountpoint: "/media/data", fstype: "ext4", "volume_type": "ephemeral", ephemeral: ephemeral2}], lvmparams: {vg_name: "vg0", lv_name: "lv0", lv_size: "+100%FREE"} } aws_access_key: "" aws_secret_key: "" vpc_name: "test{{buildenv}}" diff --git a/_dependencies/library/ebsmap.py b/_dependencies/library/ebsmap.py index dbd6d717..0cb47d6a 100644 --- a/_dependencies/library/ebsmap.py +++ b/_dependencies/library/ebsmap.py @@ -110,6 +110,11 @@ except: pass +try: + from urllib.request import urlopen +except ImportError: + from urllib2 import urlopen + NVME_ADMIN_IDENTIFY = 0x06 NVME_IOCTL_ADMIN_CMD = 0xC0484E41 AMZN_NVME_VID = 0x1D0F @@ -243,10 +248,22 @@ def fail_json(self, msg): module = cDummyAnsibleModule() # Get all existing block volumes by key=value, then parse this into a dictionary (which excludes non disk and partition block types, e.g. ram, loop). - lsblk_devices = subprocess.check_output(['lsblk', '-o', 'NAME,TYPE,UUID,FSTYPE,PARTLABEL,MOUNTPOINT,SERIAL', '-P']).decode().rstrip().split('\n') + lsblk_devices = subprocess.check_output(['lsblk', '-o', 'NAME,TYPE,UUID,FSTYPE,MOUNTPOINT,MODEL,SERIAL', '-P']).decode().rstrip().split('\n') os_device_names = [dict((map(lambda x: x.strip("\""), sub.split("="))) for sub in dev.split('\" ') if '=' in sub) for dev in lsblk_devices] os_device_names = [dev for dev in os_device_names if dev['TYPE'] in ['disk', 'part', 'lvm']] - + os_device_names.sort(key=lambda k: k['NAME']) + + # Instance stores (AKA ephemeral volumes) do not appear to have a defined endpoint that maps between the /dev/sd[b-e] defined in the instance creation map, and the OS /dev/nvme[0-26]n1 device. + # For this scenario, we can only return the instance stores in the order that they are defined. Because instance stores do not survive a poweroff and cannot be detached and reattached, the order doesn't matter as much. + instance_store_map = [] + with urlopen("http://169.254.169.254/latest/meta-data/block-device-mapping/") as response__block_device_mapping: + block_device_mappings = response__block_device_mapping.read().decode().split("\n") + for block_device_mappings__ephemeral_id in [dev for dev in block_device_mappings if dev.startswith('ephemeral')]: + with urlopen("http://169.254.169.254/latest/meta-data/block-device-mapping/" + block_device_mappings__ephemeral_id) as response__ephemeral_device: + block_device_mappings__ephemeral_mapped = response__ephemeral_device.read().decode() + instance_store_map.append({'ephemeral_id': block_device_mappings__ephemeral_id, 'ephemeral_map': block_device_mappings__ephemeral_mapped}) + + instance_store_count = 0 for os_device in os_device_names: os_device_path = "/dev/" + os_device['NAME'] if os_device['NAME'].startswith("nvme"): @@ -254,6 +271,12 @@ def fail_json(self, msg): dev = ebs_nvme_device(os_device_path) except FileNotFoundError as e: module.fail_json(msg=os_device_path + ": FileNotFoundError" + str(e)) + except TypeError as e: + if instance_store_count < len(instance_store_map): + os_device.update({"device_name_os": os_device_path, "device_name_aws": '/dev/' + instance_store_map[instance_store_count]['ephemeral_map'], "volume_id": dev.get_volume_id()}) + instance_store_count += 1 + else: + module.warn(u"%s is not an EBS device and there is no instance store mapping." % os_device_path) except OSError as e: module.warn(u"%s is not an nvme device." % os_device_path) else: From e0bf912381d63ca5a632608b3348ed3c746d7e55 Mon Sep 17 00:00:00 2001 From: Dougal Seeley Date: Mon, 5 Oct 2020 18:33:41 +0100 Subject: [PATCH 16/58] Fix reattach syntax --- create/tasks/aws.yml | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/create/tasks/aws.yml b/create/tasks/aws.yml index 4aee2484..792dcaec 100644 --- a/create/tasks/aws.yml +++ b/create/tasks/aws.yml @@ -30,8 +30,7 @@ region: "{{cluster_vars.region}}" id: "{{item.auto_volume.src.volume_id}}" instance: None - loop: "{{ cluster_hosts_target_denormalised_by_volume }}" - when: "'src' in item.auto_volume" + loop: "{{ cluster_hosts_target_denormalised_by_volume | selectattr('auto_volume.src', 'defined') | list }}" - name: create/aws | Create EC2 VMs asynchronously ec2: @@ -96,7 +95,7 @@ volume_size: "{%- if 'src' not in item.auto_volume -%}{{item.auto_volume.volume_size}}{%- endif -%}" volume_type: "{{item.auto_volume.volume_type}}" delete_on_termination: yes - loop: "{{ cluster_hosts_target_denormalised_by_volume| selectattr('src', 'defined') | list }}" + loop: "{{ cluster_hosts_target_denormalised_by_volume | selectattr('auto_volume.src', 'defined') | list }}" async: 7200 poll: 0 register: r__ec2_vol From 9d2cdb9229b5a3c2415b968a664cad4827a352ca Mon Sep 17 00:00:00 2001 From: Dougal Seeley Date: Tue, 6 Oct 2020 10:25:57 +0100 Subject: [PATCH 17/58] Remove debug that is invalid during redeploy --- config/tasks/disks_auto_aws.yml | 4 ---- 1 file changed, 4 deletions(-) diff --git a/config/tasks/disks_auto_aws.yml b/config/tasks/disks_auto_aws.yml index bc2f8859..b3626846 100644 --- a/config/tasks/disks_auto_aws.yml +++ b/config/tasks/disks_auto_aws.yml @@ -3,10 +3,6 @@ - name: disks_auto_aws | auto_volumes debug: msg={{ cluster_vars[buildenv].hosttype_vars[hostvars[inventory_hostname].hosttype].auto_volumes }} -- name: disks_auto_aws | cluster_hosts_target(inventory_hostname) - debug: msg={{ (cluster_hosts_target | selectattr('hostname', '==', inventory_hostname) | list | first)['auto_volumes'] }} - - - name: disks_auto_aws | Mount volumes as individual disks block: - name: disks_auto_aws | Get the nvme information (pre-filesystem create) From 2a2917c7710717074d50c694e9d63a902bb429f1 Mon Sep 17 00:00:00 2001 From: Dougal Seeley Date: Tue, 6 Oct 2020 16:15:21 +0100 Subject: [PATCH 18/58] Add test code to allow creating a file in each mount that identifies where it was supposed to be mounted. --- config/tasks/disks_auto_aws.yml | 54 +++++++++++++++++++++++++++----- dynamic_inventory/tasks/main.yml | 4 ++- 2 files changed, 49 insertions(+), 9 deletions(-) diff --git a/config/tasks/disks_auto_aws.yml b/config/tasks/disks_auto_aws.yml index b3626846..479982d9 100644 --- a/config/tasks/disks_auto_aws.yml +++ b/config/tasks/disks_auto_aws.yml @@ -1,7 +1,7 @@ --- -- name: disks_auto_aws | auto_volumes - debug: msg={{ cluster_vars[buildenv].hosttype_vars[hostvars[inventory_hostname].hosttype].auto_volumes }} +- name: disks_auto_aws | cluster_vars[buildenv].hosttype_vars[hostvars[inventory_hostname].hosttype + debug: msg={{ cluster_vars[buildenv].hosttype_vars[hostvars[inventory_hostname].hosttype] }} - name: disks_auto_aws | Mount volumes as individual disks block: @@ -24,10 +24,10 @@ ebsmap: become: yes register: r__ebsmap - + - name: disks_auto_aws | r__ebsmap (post-filesystem create) debug: msg={{r__ebsmap}} - + - name: disks_auto_aws | Mount created filesytem(s) persistently become: yes mount: @@ -37,7 +37,7 @@ state: mounted opts: _netdev loop: "{{auto_vols}}" - + - name: disks_auto_aws | change ownership of mountpoint (if set) become: yes file: @@ -47,6 +47,26 @@ owner: "{{ item.perms.owner | default(omit)}}" group: "{{ item.perms.group | default(omit)}}" loop: "{{auto_vols}}" + + - block: + - name: disks_auto_aws | Touch a file with the mountpoint and device name for testing that disk attachment is correct + become: yes + file: + path: "{{item.mountpoint}}/__clusterversetest_{{ item.mountpoint | regex_replace('\/', '_') }}_{{ item.device_name | regex_replace('\/', '_') }}" + state: touch + loop: "{{auto_vols}}" + + - name: disks_auto_aws | Find all __clusterversetest_ files in newly mounted disks + find: + paths: "{{item.mountpoint}}" + patterns: "__clusterversetest_*" + loop: "{{auto_vols}}" + register: r__find_test + + - name: disks_auto_aws | Display all __clusterversetest_ files in newly mounted disks. + debug: + msg: "{{ r__find_test | json_query(\"results[].{device_name: item.device_name, mountpoint: item.mountpoint, files: files[].path}\") }}" + when: test_touch_disks is defined and test_touch_disks|bool when: (auto_vols | map(attribute='mountpoint') | list | unique | count == auto_vols | map(attribute='mountpoint') | list | count) vars: auto_vols: "{{ cluster_vars[buildenv].hosttype_vars[hostvars[inventory_hostname].hosttype].auto_volumes }}" @@ -88,18 +108,36 @@ - name: disks_auto_aws | Create filesystem(s) on attached nvme volume(s) become: yes filesystem: - fstype: "{{ hosttype_vars.auto_volumes | map(attribute='fstype') | list | unique | join('') }}" + fstype: "{{ hosttype_vars.auto_volumes[0].fstype }}" dev: "/dev/{{ hosttype_vars.lvmparams.vg_name }}/{{ hosttype_vars.lvmparams.lv_name }}" force: no - name: disks_auto_aws | Mount created filesytem(s) persistently become: yes mount: - path: "{{ hosttype_vars.auto_volumes | map(attribute='mountpoint') | list | unique | join('') }}" + path: "{{ hosttype_vars.auto_volumes[0].mountpoint }}" src: "/dev/{{ hosttype_vars.lvmparams.vg_name }}/{{ hosttype_vars.lvmparams.lv_name }}" - fstype: "{{ hosttype_vars.auto_volumes | map(attribute='fstype') | list | unique | join('') }}" + fstype: "{{ hosttype_vars.auto_volumes[0].fstype }}" state: mounted opts: _netdev + + - block: + - name: disks_auto_aws | Touch a file with the mountpoint and device name for testing that disk attachment is correct + become: yes + file: + path: "{{ hosttype_vars.auto_volumes[0].mountpoint }}/__clusterversetest_{{ hosttype_vars.auto_volumes[0].mountpoint | regex_replace('\/', '_') }}" + state: touch + + - name: disks_auto_aws | Find all __clusterversetest_ files in newly mounted disks + find: + paths: "{{ hosttype_vars.auto_volumes[0].mountpoint }}" + patterns: "__clusterversetest_*" + register: r__find_test + + - name: disks_auto_aws | Display all __clusterversetest_ files in newly mounted disks. + debug: + msg: "{{ r__find_test | json_query(\"files[].path\") }}" + when: test_touch_disks is defined and test_touch_disks|bool when: ('lvmparams' in hosttype_vars) and (hosttype_vars.auto_volumes | map(attribute='mountpoint') | list | unique | count == 1) and (hosttype_vars.auto_volumes | map(attribute='mountpoint') | list | count >= 2) and (hosttype_vars.auto_volumes | map(attribute='fstype') | list | unique | count == 1) vars: hosttype_vars: "{{ cluster_vars[buildenv].hosttype_vars[hostvars[inventory_hostname].hosttype] }}" diff --git a/dynamic_inventory/tasks/main.yml b/dynamic_inventory/tasks/main.yml index 06a7d102..0c1925a0 100644 --- a/dynamic_inventory/tasks/main.yml +++ b/dynamic_inventory/tasks/main.yml @@ -14,7 +14,9 @@ ping: delegate_to: "{{ item.inventory_ip }}" with_items: "{{ dynamic_inventory_flat }}" - retries: 12 + register: r__ping + until: r__ping is success + retries: 5 - name: dynamic_inventory | Refresh (clean it, because there is no file or plugin inventory defined) the in-memory inventory prior to building it (this is in case this module is called multiple times, and we otherwise only add hosts to existing inventory) meta: refresh_inventory From c9c9598e48f281efac6d013542bb0d47031686f1 Mon Sep 17 00:00:00 2001 From: Dougal Seeley Date: Thu, 22 Oct 2020 16:36:45 +0100 Subject: [PATCH 19/58] Fix missing jinja2 endfor in disks_auto_generic.yml (i.e. GCP disk mounting) --- config/tasks/disks_auto_generic.yml | 1 + 1 file changed, 1 insertion(+) diff --git a/config/tasks/disks_auto_generic.yml b/config/tasks/disks_auto_generic.yml index cde01eac..4c30ee1e 100644 --- a/config/tasks/disks_auto_generic.yml +++ b/config/tasks/disks_auto_generic.yml @@ -33,6 +33,7 @@ {%- set blkvolloop.break = true -%} {%- endif -%} {%- endfor -%} + {%- endfor -%} {{ res }} #- name: disks_auto_generic | hostvols From d79737b425467ed3fc2294a772655aec50123d74 Mon Sep 17 00:00:00 2001 From: Dougal Seeley Date: Sun, 25 Oct 2020 12:12:52 +0000 Subject: [PATCH 20/58] Add GCP functionality for _scheme_rmvm_keepdisk_rollback. + Also add disk labelling for GCP. --- _dependencies/library/blockdevmap.py | 347 ++++++++++++++++++ .../{ebsmap__LICENSE => blockdevmap_LICENSE} | 6 + _dependencies/library/blockdevmap_README.md | 44 +++ _dependencies/library/ebsmap.py | 293 --------------- _dependencies/library/ebsmap__README.md | 24 -- .../tasks/get_cluster_hosts_target.yml | 22 +- ...ks_auto_aws.yml => disks_auto_aws_gcp.yml} | 83 +++-- config/tasks/disks_auto_generic.yml | 35 +- config/tasks/main.yml | 12 +- create/tasks/gcp.yml | 137 ++++++- ..._diskinfo_to_cluster_hosts_target__aws.yml | 8 +- ..._diskinfo_to_cluster_hosts_target__gcp.yml | 39 ++ .../tasks/preflight.yml | 2 +- 13 files changed, 639 insertions(+), 413 deletions(-) create mode 100644 _dependencies/library/blockdevmap.py rename _dependencies/library/{ebsmap__LICENSE => blockdevmap_LICENSE} (94%) create mode 100644 _dependencies/library/blockdevmap_README.md delete mode 100644 _dependencies/library/ebsmap.py delete mode 100644 _dependencies/library/ebsmap__README.md rename config/tasks/{disks_auto_aws.yml => disks_auto_aws_gcp.yml} (50%) create mode 100644 redeploy/_scheme_rmvm_keepdisk_rollback/tasks/_add_src_diskinfo_to_cluster_hosts_target__gcp.yml diff --git a/_dependencies/library/blockdevmap.py b/_dependencies/library/blockdevmap.py new file mode 100644 index 00000000..3581f437 --- /dev/null +++ b/_dependencies/library/blockdevmap.py @@ -0,0 +1,347 @@ +# Copyright 2020 Dougal Seeley +# https://github.com/dseeley/blockdevmap + +# Copyright 2017 Amazon.com, Inc. and its affiliates. All Rights Reserved. +# Licensed under the MIT License. See the LICENSE accompanying this file +# for the specific language governing permissions and limitations under +# the License. +# /sbin/ebsnvme-id - https://docs.aws.amazon.com/AWSEC2/latest/UserGuide/nvme-ebs-volumes.html + +from __future__ import (absolute_import, division, print_function) + +__metaclass__ = type + +DOCUMENTATION = ''' +--- +module: blockdevmap +version_added: 1.0.0 +short_description: blockdevmap +description: + - Map the block device name as defined in AWS/GCP (e.g. /dev/sdf) with the volume provided to the OS +authors: + - Dougal Seeley + - Amazon.com Inc. +''' + +EXAMPLES = ''' +- name: Get block device map information for GCP + blockdevmap: + cloud_type: gcp + become: yes + register: r__blockdevmap + +- name: Get block device map information for AWS + blockdevmap: + cloud_type: aws + become: yes + register: r__blockdevmap + +- name: Get lsblk device map information + blockdevmap: + cloud_type: lsblk + become: yes + register: r__blockdevmap + +- name: debug blockdevmap + debug: msg={{r__blockdevmap}} +''' + +RETURN = ''' +"device_map": [ + { + "FSTYPE": "ext4", + "MOUNTPOINT": "/media/mysvc", + "NAME": "nvme1n1", + "PARTLABEL": "", + "SERIAL": "vol0c2c47ee4516063e9", + "TYPE": "disk", + "UUID": "c3630dbe-042e-44e5-ac67-54fa1c9e4cd2", + "device_name_cloud": "/dev/sdf", + "device_name_os": "/dev/nvme1n1", + "volume_id": "vol-0c2c47ee4516063e9" + }, + { + "FSTYPE": "", + "MOUNTPOINT": "", + "NAME": "nvme0n1", + "PARTLABEL": "", + "SERIAL": "vol0b05e48d5677db81a", + "TYPE": "disk", + "UUID": "", + "device_name_cloud": "/dev/sda1", + "device_name_os": "/dev/nvme0n1", + "volume_id": "vol-0b05e48d5677db81a" + }, + { + "FSTYPE": "ext4", + "MOUNTPOINT": "/", + "NAME": "nvme0n1p1", + "PARTLABEL": "", + "SERIAL": "", + "TYPE": "part", + "UUID": "96ec7adb-9d94-41c0-96a5-d6992c9d5f20", + "device_name_cloud": "/dev/sda1", + "device_name_os": "/dev/nvme0n1p1", + "volume_id": "vol-0b05e48d5677db81a" + } + +"device_map": [ + { + "FSTYPE": "", + "MOUNTPOINT": "", + "NAME": "xvda", + "PARTLABEL": "", + "SERIAL": "", + "TYPE": "disk", + "UUID": "", + "device_name_cloud": "/dev/sda", + "device_name_os": "/dev/xvda" + }, + { + "FSTYPE": "ext4", + "MOUNTPOINT": "/", + "NAME": "xvda1", + "PARTLABEL": "", + "SERIAL": "", + "TYPE": "part", + "UUID": "96ec7adb-9d94-41c0-96a5-d6992c9d5f20", + "device_name_cloud": "/dev/sda1", + "device_name_os": "/dev/xvda1" + } +''' + +from ctypes import * +from fcntl import ioctl +import subprocess +import sys +import json +import re + +try: + from ansible.module_utils.basic import AnsibleModule + from ansible.errors import AnsibleError + from ansible.utils.display import Display +except: + pass + +try: + from urllib.request import urlopen +except ImportError: + from urllib2 import urlopen + +NVME_ADMIN_IDENTIFY = 0x06 +NVME_IOCTL_ADMIN_CMD = 0xC0484E41 +AMZN_NVME_VID = 0x1D0F +AMZN_NVME_EBS_MN = "Amazon Elastic Block Store" + + +class nvme_admin_command(Structure): + _pack_ = 1 + _fields_ = [("opcode", c_uint8), # op code + ("flags", c_uint8), # fused operation + ("cid", c_uint16), # command id + ("nsid", c_uint32), # namespace id + ("reserved0", c_uint64), + ("mptr", c_uint64), # metadata pointer + ("addr", c_uint64), # data pointer + ("mlen", c_uint32), # metadata length + ("alen", c_uint32), # data length + ("cdw10", c_uint32), + ("cdw11", c_uint32), + ("cdw12", c_uint32), + ("cdw13", c_uint32), + ("cdw14", c_uint32), + ("cdw15", c_uint32), + ("reserved1", c_uint64)] + + +class nvme_identify_controller_amzn_vs(Structure): + _pack_ = 1 + _fields_ = [("bdev", c_char * 32), # block device name + ("reserved0", c_char * (1024 - 32))] + + +class nvme_identify_controller_psd(Structure): + _pack_ = 1 + _fields_ = [("mp", c_uint16), # maximum power + ("reserved0", c_uint16), + ("enlat", c_uint32), # entry latency + ("exlat", c_uint32), # exit latency + ("rrt", c_uint8), # relative read throughput + ("rrl", c_uint8), # relative read latency + ("rwt", c_uint8), # relative write throughput + ("rwl", c_uint8), # relative write latency + ("reserved1", c_char * 16)] + + +class nvme_identify_controller(Structure): + _pack_ = 1 + _fields_ = [("vid", c_uint16), # PCI Vendor ID + ("ssvid", c_uint16), # PCI Subsystem Vendor ID + ("sn", c_char * 20), # Serial Number + ("mn", c_char * 40), # Module Number + ("fr", c_char * 8), # Firmware Revision + ("rab", c_uint8), # Recommend Arbitration Burst + ("ieee", c_uint8 * 3), # IEEE OUI Identifier + ("mic", c_uint8), # Multi-Interface Capabilities + ("mdts", c_uint8), # Maximum Data Transfer Size + ("reserved0", c_uint8 * (256 - 78)), + ("oacs", c_uint16), # Optional Admin Command Support + ("acl", c_uint8), # Abort Command Limit + ("aerl", c_uint8), # Asynchronous Event Request Limit + ("frmw", c_uint8), # Firmware Updates + ("lpa", c_uint8), # Log Page Attributes + ("elpe", c_uint8), # Error Log Page Entries + ("npss", c_uint8), # Number of Power States Support + ("avscc", c_uint8), # Admin Vendor Specific Command Configuration + ("reserved1", c_uint8 * (512 - 265)), + ("sqes", c_uint8), # Submission Queue Entry Size + ("cqes", c_uint8), # Completion Queue Entry Size + ("reserved2", c_uint16), + ("nn", c_uint32), # Number of Namespaces + ("oncs", c_uint16), # Optional NVM Command Support + ("fuses", c_uint16), # Fused Operation Support + ("fna", c_uint8), # Format NVM Attributes + ("vwc", c_uint8), # Volatile Write Cache + ("awun", c_uint16), # Atomic Write Unit Normal + ("awupf", c_uint16), # Atomic Write Unit Power Fail + ("nvscc", c_uint8), # NVM Vendor Specific Command Configuration + ("reserved3", c_uint8 * (704 - 531)), + ("reserved4", c_uint8 * (2048 - 704)), + ("psd", nvme_identify_controller_psd * 32), # Power State Descriptor + ("vs", nvme_identify_controller_amzn_vs)] # Vendor Specific + + +class cBlockDevMap(object): + def __init__(self, module, **kwds): + self.module = module + self.device_map = [] + + def get_lsblk(self): + # Get all existing block volumes by key=value, then parse this into a dictionary (which excludes non disk and partition block types, e.g. ram, loop). + lsblk_devices = subprocess.check_output(['lsblk', '-o', 'NAME,TYPE,UUID,FSTYPE,MOUNTPOINT,MODEL,PTTYPE,SERIAL,SIZE', '-P', '-b']).decode().rstrip().split('\n') + os_device_names = [dict((map(lambda x: x.strip("\""), sub.split("="))) for sub in dev.split('\" ') if '=' in sub) for dev in lsblk_devices] + os_device_names = [dev for dev in os_device_names if dev['TYPE'] in ['disk', 'part', 'lvm']] + os_device_names.sort(key=lambda k: k['NAME']) + return os_device_names + + +class cLsblkMapper(cBlockDevMap): + def __init__(self, **kwds): + super().__init__(**kwds) + + self.device_map = self.get_lsblk() + for os_device in self.device_map: + os_device.update({"device_name_os": "/dev/" + os_device['NAME'], "device_name_cloud": ""}) + + +class cGCPMapper(cBlockDevMap): + def __init__(self, **kwds): + super().__init__(**kwds) + + self.device_map = self.get_lsblk() + + for os_device in self.device_map: + os_device.update({"device_name_os": "/dev/" + os_device['NAME'], "device_name_cloud": os_device['SERIAL']}) + + +class cAwsMapper(cBlockDevMap): + def __init__(self, **kwds): + super().__init__(**kwds) + # Instance stores (AKA ephemeral volumes) do not appear to have a defined endpoint that maps between the /dev/sd[b-e] defined in the instance creation map, and the OS /dev/nvme[0-26]n1 device. + # For this scenario, we can only return the instance stores in the order that they are defined. Because instance stores do not survive a poweroff and cannot be detached and reattached, the order doesn't matter as much. + instance_store_map = [] + with urlopen("http://169.254.169.254/latest/meta-data/block-device-mapping/") as response__block_device_mapping: + block_device_mappings = response__block_device_mapping.read().decode().split("\n") + for block_device_mappings__ephemeral_id in [dev for dev in block_device_mappings if dev.startswith('ephemeral')]: + with urlopen("http://169.254.169.254/latest/meta-data/block-device-mapping/" + block_device_mappings__ephemeral_id) as response__ephemeral_device: + block_device_mappings__ephemeral_mapped = response__ephemeral_device.read().decode() + instance_store_map.append({'ephemeral_id': block_device_mappings__ephemeral_id, 'ephemeral_map': block_device_mappings__ephemeral_mapped}) + + instance_store_count = 0 + self.device_map = self.get_lsblk() + for os_device in self.device_map: + os_device_path = "/dev/" + os_device['NAME'] + if os_device['NAME'].startswith("nvme"): + try: + dev = cAwsMapper.ebs_nvme_device(os_device_path) + except FileNotFoundError as e: + self.module.fail_json(msg=os_device_path + ": FileNotFoundError" + str(e)) + except TypeError as e: + if instance_store_count < len(instance_store_map): + os_device.update({"device_name_os": os_device_path, "device_name_cloud": '/dev/' + instance_store_map[instance_store_count]['ephemeral_map'], "volume_id": dev.get_volume_id()}) + instance_store_count += 1 + else: + self.module.warn(u"%s is not an EBS device and there is no instance store mapping." % os_device_path) + except OSError as e: + self.module.warn(u"%s is not an nvme device." % os_device_path) + else: + os_device.update({"device_name_os": os_device_path, "device_name_cloud": '/dev/' + dev.get_block_device(stripped=True).rstrip(), "volume_id": dev.get_volume_id()}) + elif os_device['NAME'].startswith("xvd"): + os_device.update({"device_name_os": os_device_path, "device_name_cloud": '/dev/' + re.sub(r'xvd(.*)', r'sd\1', os_device['NAME'])}) + else: + os_device.update({"device_name_os": os_device_path, "device_name_cloud": ""}) + + class ebs_nvme_device(): + def __init__(self, device): + self.device = device + self.ctrl_identify() + + def _nvme_ioctl(self, id_response, id_len): + admin_cmd = nvme_admin_command(opcode=NVME_ADMIN_IDENTIFY, addr=id_response, alen=id_len, cdw10=1) + with open(self.device, "rt") as nvme: + ioctl(nvme, NVME_IOCTL_ADMIN_CMD, admin_cmd) + + def ctrl_identify(self): + self.id_ctrl = nvme_identify_controller() + self._nvme_ioctl(addressof(self.id_ctrl), sizeof(self.id_ctrl)) + if self.id_ctrl.vid != AMZN_NVME_VID or self.id_ctrl.mn.decode().strip() != AMZN_NVME_EBS_MN: + raise TypeError("[ERROR] Not an EBS device: '{0}'".format(self.device)) + + def get_volume_id(self): + vol = self.id_ctrl.sn.decode() + if vol.startswith("vol") and vol[3] != "-": + vol = "vol-" + vol[3:] + return vol + + def get_block_device(self, stripped=False): + device = self.id_ctrl.vs.bdev.decode() + if stripped and device.startswith("/dev/"): + device = device[5:] + return device + + +def main(): + if not (len(sys.argv) > 1 and sys.argv[1] == "console"): + module = AnsibleModule(argument_spec={"cloud_type": {"type": "str", "required": True, "choices": ['aws', 'gcp', 'lsblk']}}, supports_check_mode=True) + else: + # For testing without Ansible (e.g on Windows) + class cDummyAnsibleModule(): + params = {"cloud_type": "aws"} + + def exit_json(self, changed, **kwargs): + print(changed, json.dumps(kwargs, sort_keys=True, indent=4, separators=(',', ': '))) + + def warn(self, msg): + print("[WARNING]: " + msg) + + def fail_json(self, msg): + print("Failed: " + msg) + exit(1) + + module = cDummyAnsibleModule() + + if module.params['cloud_type'] == 'aws': + blockdevmap = cAwsMapper(module=module) + elif module.params['cloud_type'] == 'gcp': + blockdevmap = cGCPMapper(module=module) + elif module.params['cloud_type'] == 'lsblk': + blockdevmap = cLsblkMapper(module=module) + else: + module.fail_json(msg="cloud_type not valid :" + module.params['cloud_type']) + + module.exit_json(changed=False, device_map=blockdevmap.device_map) + + +if __name__ == '__main__': + main() diff --git a/_dependencies/library/ebsmap__LICENSE b/_dependencies/library/blockdevmap_LICENSE similarity index 94% rename from _dependencies/library/ebsmap__LICENSE rename to _dependencies/library/blockdevmap_LICENSE index 55138771..7d404386 100644 --- a/_dependencies/library/ebsmap__LICENSE +++ b/_dependencies/library/blockdevmap_LICENSE @@ -1,3 +1,6 @@ +--- +## For the blockdevmap.py script: + BSD 3-Clause License Copyright (c) 2020, Dougal Seeley @@ -30,6 +33,9 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. --- +## For the parts of blockdevmap.py derived from the /sbin/ebsnvme-id (Amazon.com, Inc) script: + +MIT License Copyright 2017 Amazon.com, Inc. and its affiliates. All Rights Reserved. diff --git a/_dependencies/library/blockdevmap_README.md b/_dependencies/library/blockdevmap_README.md new file mode 100644 index 00000000..7f9b8fb7 --- /dev/null +++ b/_dependencies/library/blockdevmap_README.md @@ -0,0 +1,44 @@ +# blockdevmap +This is an Ansible module that is able to map AWS and GCP device names to the host device names. It returns a dictionary, derived from Linux `lsblk`, (augmented in the case of AWS with results from elsewhere). + +### AWS ++ On AWS 'nitro' instances all EBS mappings are attached to the NVME controller. The nvme mapping is non-deterministic though, so the script uses ioctl commands to query the nvme controller (from a script by Amazon that is present on 'Amazon Linux' machines: `/sbin/ebsnvme-id`. See documentation: https://docs.aws.amazon.com/AWSEC2/latest/UserGuide/nvme-ebs-volumes). ++ For non-nitro EBS mapping, the script enumerates the mapping in the alphanumerical order of the disk device names. This is the correct order except for some very old RHEL/Centos AMIs, which are not supported. ++ For ephemeral volume mapping, it uses the http://169.254.169.254/latest/meta-data/block-device-mapping/ endpoint. + +### GCP ++ GCP device names are user-defined, and appear as entries in the `lsblk` _SERIAL_ column, mapped to the `lsblk` _NAME_ column. + +### lsblk ++ The script can be run as plain `lsblk` command, where the cloud provider does not include a mapping, and will return the information as a dictionary. For example, the _bytes_ mapped to the _NAME_ field could be cross-checked against the requested disk size to create a mapping. + + +## Execution +This can be run as an Ansible module (needs root): +```yaml +- name: Get block device map information for GCP + blockdevmap: + cloud_type: gcp + become: yes + register: r__blockdevmap + +- name: Get block device map information for AWS + blockdevmap: + cloud_type: aws + become: yes + register: r__blockdevmap + +- name: Get lsblk device map information + blockdevmap: + cloud_type: lsblk + become: yes + register: r__blockdevmap + +- name: debug blockdevmap + debug: msg={{r__blockdevmap}} +``` + +or from the console: +```bash +python3 ./blockdevmap.py console +``` \ No newline at end of file diff --git a/_dependencies/library/ebsmap.py b/_dependencies/library/ebsmap.py deleted file mode 100644 index 0cb47d6a..00000000 --- a/_dependencies/library/ebsmap.py +++ /dev/null @@ -1,293 +0,0 @@ -# Copyright 2020 Dougal Seeley -# https://github.com/dseeley/ebsmap - -# Copyright 2017 Amazon.com, Inc. and its affiliates. All Rights Reserved. -# Licensed under the MIT License. See the LICENSE accompanying this file -# for the specific language governing permissions and limitations under -# the License. - -from __future__ import (absolute_import, division, print_function) - -__metaclass__ = type - -DOCUMENTATION = ''' ---- -module: ebsmap -version_added: 1.0.0 -short_description: ebsmap -description: - - Map the EBS device name as defined in AWS (e.g. /dev/sdf) with the volume provided to the OS -author: - - Dougal Seeley - - Amazon.com inc. -''' - -EXAMPLES = ''' -- name: Get the nvme map information - ebsmap: - become: yes - register: r__ebsmap - -- name: ebsmap - debug: msg={{ebsmap}} -''' - -RETURN = ''' -"device_map": [ - { - "FSTYPE": "ext4", - "MOUNTPOINT": "/media/mysvc", - "NAME": "nvme1n1", - "PARTLABEL": "", - "SERIAL": "vol0c2c47ee4516063e9", - "TYPE": "disk", - "UUID": "c3630dbe-042e-44e5-ac67-54fa1c9e4cd2", - "device_name_aws": "/dev/sdf", - "device_name_os": "/dev/nvme1n1", - "volume_id": "vol-0c2c47ee4516063e9" - }, - { - "FSTYPE": "", - "MOUNTPOINT": "", - "NAME": "nvme0n1", - "PARTLABEL": "", - "SERIAL": "vol0b05e48d5677db81a", - "TYPE": "disk", - "UUID": "", - "device_name_aws": "/dev/sda1", - "device_name_os": "/dev/nvme0n1", - "volume_id": "vol-0b05e48d5677db81a" - }, - { - "FSTYPE": "ext4", - "MOUNTPOINT": "/", - "NAME": "nvme0n1p1", - "PARTLABEL": "", - "SERIAL": "", - "TYPE": "part", - "UUID": "96ec7adb-9d94-41c0-96a5-d6992c9d5f20", - "device_name_aws": "/dev/sda1", - "device_name_os": "/dev/nvme0n1p1", - "volume_id": "vol-0b05e48d5677db81a" - } - -"device_map": [ - { - "FSTYPE": "", - "MOUNTPOINT": "", - "NAME": "xvda", - "PARTLABEL": "", - "SERIAL": "", - "TYPE": "disk", - "UUID": "", - "device_name_aws": "/dev/sda", - "device_name_os": "/dev/xvda" - }, - { - "FSTYPE": "ext4", - "MOUNTPOINT": "/", - "NAME": "xvda1", - "PARTLABEL": "", - "SERIAL": "", - "TYPE": "part", - "UUID": "96ec7adb-9d94-41c0-96a5-d6992c9d5f20", - "device_name_aws": "/dev/sda1", - "device_name_os": "/dev/xvda1" - } -''' - -from ctypes import * -from fcntl import ioctl -import subprocess -import sys -import json -import re - -try: - from ansible.module_utils.basic import AnsibleModule - from ansible.errors import AnsibleError - from ansible.utils.display import Display -except: - pass - -try: - from urllib.request import urlopen -except ImportError: - from urllib2 import urlopen - -NVME_ADMIN_IDENTIFY = 0x06 -NVME_IOCTL_ADMIN_CMD = 0xC0484E41 -AMZN_NVME_VID = 0x1D0F -AMZN_NVME_EBS_MN = "Amazon Elastic Block Store" - - -class nvme_admin_command(Structure): - _pack_ = 1 - _fields_ = [("opcode", c_uint8), # op code - ("flags", c_uint8), # fused operation - ("cid", c_uint16), # command id - ("nsid", c_uint32), # namespace id - ("reserved0", c_uint64), - ("mptr", c_uint64), # metadata pointer - ("addr", c_uint64), # data pointer - ("mlen", c_uint32), # metadata length - ("alen", c_uint32), # data length - ("cdw10", c_uint32), - ("cdw11", c_uint32), - ("cdw12", c_uint32), - ("cdw13", c_uint32), - ("cdw14", c_uint32), - ("cdw15", c_uint32), - ("reserved1", c_uint64)] - - -class nvme_identify_controller_amzn_vs(Structure): - _pack_ = 1 - _fields_ = [("bdev", c_char * 32), # block device name - ("reserved0", c_char * (1024 - 32))] - - -class nvme_identify_controller_psd(Structure): - _pack_ = 1 - _fields_ = [("mp", c_uint16), # maximum power - ("reserved0", c_uint16), - ("enlat", c_uint32), # entry latency - ("exlat", c_uint32), # exit latency - ("rrt", c_uint8), # relative read throughput - ("rrl", c_uint8), # relative read latency - ("rwt", c_uint8), # relative write throughput - ("rwl", c_uint8), # relative write latency - ("reserved1", c_char * 16)] - - -class nvme_identify_controller(Structure): - _pack_ = 1 - _fields_ = [("vid", c_uint16), # PCI Vendor ID - ("ssvid", c_uint16), # PCI Subsystem Vendor ID - ("sn", c_char * 20), # Serial Number - ("mn", c_char * 40), # Module Number - ("fr", c_char * 8), # Firmware Revision - ("rab", c_uint8), # Recommend Arbitration Burst - ("ieee", c_uint8 * 3), # IEEE OUI Identifier - ("mic", c_uint8), # Multi-Interface Capabilities - ("mdts", c_uint8), # Maximum Data Transfer Size - ("reserved0", c_uint8 * (256 - 78)), - ("oacs", c_uint16), # Optional Admin Command Support - ("acl", c_uint8), # Abort Command Limit - ("aerl", c_uint8), # Asynchronous Event Request Limit - ("frmw", c_uint8), # Firmware Updates - ("lpa", c_uint8), # Log Page Attributes - ("elpe", c_uint8), # Error Log Page Entries - ("npss", c_uint8), # Number of Power States Support - ("avscc", c_uint8), # Admin Vendor Specific Command Configuration - ("reserved1", c_uint8 * (512 - 265)), - ("sqes", c_uint8), # Submission Queue Entry Size - ("cqes", c_uint8), # Completion Queue Entry Size - ("reserved2", c_uint16), - ("nn", c_uint32), # Number of Namespaces - ("oncs", c_uint16), # Optional NVM Command Support - ("fuses", c_uint16), # Fused Operation Support - ("fna", c_uint8), # Format NVM Attributes - ("vwc", c_uint8), # Volatile Write Cache - ("awun", c_uint16), # Atomic Write Unit Normal - ("awupf", c_uint16), # Atomic Write Unit Power Fail - ("nvscc", c_uint8), # NVM Vendor Specific Command Configuration - ("reserved3", c_uint8 * (704 - 531)), - ("reserved4", c_uint8 * (2048 - 704)), - ("psd", nvme_identify_controller_psd * 32), # Power State Descriptor - ("vs", nvme_identify_controller_amzn_vs)] # Vendor Specific - - -class ebs_nvme_device: - def __init__(self, device): - self.device = device - self.ctrl_identify() - - def _nvme_ioctl(self, id_response, id_len): - admin_cmd = nvme_admin_command(opcode=NVME_ADMIN_IDENTIFY, addr=id_response, alen=id_len, cdw10=1) - with open(self.device, "rt") as nvme: - ioctl(nvme, NVME_IOCTL_ADMIN_CMD, admin_cmd) - - def ctrl_identify(self): - self.id_ctrl = nvme_identify_controller() - self._nvme_ioctl(addressof(self.id_ctrl), sizeof(self.id_ctrl)) - if self.id_ctrl.vid != AMZN_NVME_VID or self.id_ctrl.mn.decode().strip() != AMZN_NVME_EBS_MN: - raise TypeError("[ERROR] Not an EBS device: '{0}'".format(self.device)) - - def get_volume_id(self): - vol = self.id_ctrl.sn.decode() - if vol.startswith("vol") and vol[3] != "-": - vol = "vol-" + vol[3:] - return vol - - def get_block_device(self, stripped=False): - device = self.id_ctrl.vs.bdev.decode() - if stripped and device.startswith("/dev/"): - device = device[5:] - return device - - -def main(): - if not (len(sys.argv) > 1 and sys.argv[1] == "console"): - module = AnsibleModule(argument_spec={}, supports_check_mode=True) - else: - # For testing without Ansible (e.g on Windows) - class cDummyAnsibleModule(): - params = {} - - def exit_json(self, changed, **kwargs): - print(changed, json.dumps(kwargs, sort_keys=True, indent=4, separators=(',', ': '))) - - def warn(self, msg): - print("[WARNING]: " + msg) - - def fail_json(self, msg): - print("Failed: " + msg) - exit(1) - - module = cDummyAnsibleModule() - - # Get all existing block volumes by key=value, then parse this into a dictionary (which excludes non disk and partition block types, e.g. ram, loop). - lsblk_devices = subprocess.check_output(['lsblk', '-o', 'NAME,TYPE,UUID,FSTYPE,MOUNTPOINT,MODEL,SERIAL', '-P']).decode().rstrip().split('\n') - os_device_names = [dict((map(lambda x: x.strip("\""), sub.split("="))) for sub in dev.split('\" ') if '=' in sub) for dev in lsblk_devices] - os_device_names = [dev for dev in os_device_names if dev['TYPE'] in ['disk', 'part', 'lvm']] - os_device_names.sort(key=lambda k: k['NAME']) - - # Instance stores (AKA ephemeral volumes) do not appear to have a defined endpoint that maps between the /dev/sd[b-e] defined in the instance creation map, and the OS /dev/nvme[0-26]n1 device. - # For this scenario, we can only return the instance stores in the order that they are defined. Because instance stores do not survive a poweroff and cannot be detached and reattached, the order doesn't matter as much. - instance_store_map = [] - with urlopen("http://169.254.169.254/latest/meta-data/block-device-mapping/") as response__block_device_mapping: - block_device_mappings = response__block_device_mapping.read().decode().split("\n") - for block_device_mappings__ephemeral_id in [dev for dev in block_device_mappings if dev.startswith('ephemeral')]: - with urlopen("http://169.254.169.254/latest/meta-data/block-device-mapping/" + block_device_mappings__ephemeral_id) as response__ephemeral_device: - block_device_mappings__ephemeral_mapped = response__ephemeral_device.read().decode() - instance_store_map.append({'ephemeral_id': block_device_mappings__ephemeral_id, 'ephemeral_map': block_device_mappings__ephemeral_mapped}) - - instance_store_count = 0 - for os_device in os_device_names: - os_device_path = "/dev/" + os_device['NAME'] - if os_device['NAME'].startswith("nvme"): - try: - dev = ebs_nvme_device(os_device_path) - except FileNotFoundError as e: - module.fail_json(msg=os_device_path + ": FileNotFoundError" + str(e)) - except TypeError as e: - if instance_store_count < len(instance_store_map): - os_device.update({"device_name_os": os_device_path, "device_name_aws": '/dev/' + instance_store_map[instance_store_count]['ephemeral_map'], "volume_id": dev.get_volume_id()}) - instance_store_count += 1 - else: - module.warn(u"%s is not an EBS device and there is no instance store mapping." % os_device_path) - except OSError as e: - module.warn(u"%s is not an nvme device." % os_device_path) - else: - os_device.update({"device_name_os": os_device_path, "device_name_aws": '/dev/' + dev.get_block_device(stripped=True).rstrip(), "volume_id": dev.get_volume_id()}) - elif os_device['NAME'].startswith("xvd"): - os_device.update({"device_name_os": os_device_path, "device_name_aws": '/dev/' + re.sub(r'xvd(.*)', r'sd\1', os_device['NAME'])}) - else: - os_device.update({"device_name_os": os_device_path, "device_name_aws": ""}) - - module.exit_json(changed=False, device_map=os_device_names) - - -if __name__ == '__main__': - main() diff --git a/_dependencies/library/ebsmap__README.md b/_dependencies/library/ebsmap__README.md deleted file mode 100644 index f38b360c..00000000 --- a/_dependencies/library/ebsmap__README.md +++ /dev/null @@ -1,24 +0,0 @@ -# ebsmap - -This is an Ansible module that is able to map AWS EBS device names (including NVME devices) to the host device names. - -## Credits -The bulk of the heavy lifting is nvme ioctl commands written by AWS for their Amazon Linux AMIs. See: https://docs.aws.amazon.com/AWSEC2/latest/UserGuide/nvme-ebs-volumes.html - -## Execution -This can be run as an Ansible module (needs root): -```yaml -- name: Get the nvme map information - ebsmap: - become: yes - register: r__ebsmap - -- name: ebsmap - debug: msg={{ebsmap}} - -``` - -or from the console: -```bash -python3 ./ebsmap.py console -``` \ No newline at end of file diff --git a/cluster_hosts/tasks/get_cluster_hosts_target.yml b/cluster_hosts/tasks/get_cluster_hosts_target.yml index 3eda6a80..4ca31ce6 100644 --- a/cluster_hosts/tasks/get_cluster_hosts_target.yml +++ b/cluster_hosts/tasks/get_cluster_hosts_target.yml @@ -25,7 +25,7 @@ # Dynamically look up VPC ID by name from aws - name: get_cluster_hosts_target | Looking up VPC facts to extract ID ec2_vpc_net_info: - region: "{{ cluster_vars.region }}" + region: "{{ cluster_vars.region }}" aws_access_key: "{{ cluster_vars[buildenv].aws_access_key }}" aws_secret_key: "{{ cluster_vars[buildenv].aws_secret_key }}" filters: @@ -40,11 +40,11 @@ - name: get_cluster_hosts_target/aws | Look up proxy subnet facts ec2_vpc_subnet_info: - region: "{{ cluster_vars.region }}" + region: "{{ cluster_vars.region }}" aws_access_key: "{{ cluster_vars[buildenv].aws_access_key }}" aws_secret_key: "{{ cluster_vars[buildenv].aws_secret_key }}" filters: - vpc-id: "{{ vpc_id }}" + vpc-id: "{{ vpc_id }}" register: r__ec2_vpc_subnet_info delegate_to: localhost run_once: true @@ -103,14 +103,18 @@ - name: get_cluster_hosts_target/gcp | GCP-specific modifications to cluster_hosts_target block: - - name: get_cluster_hosts_target/gcp | Update cluster_hosts_target with rootvol_size + - name: get_cluster_hosts_target/gcp | Update cluster_hosts_target auto_volumes with device_name and initialize_params set_fact: - cluster_hosts_target: | - {% set res = cluster_hosts_target -%} - {%- for host in res -%} - {%- set _dummy = host.update({'rootvol_size': cluster_vars[buildenv].hosttype_vars[host.hosttype].rootvol_size | string}) -%} + cluster_hosts_target: |- + {%- for host in cluster_hosts_target -%} + {%- for vol in host.auto_volumes -%} + {%- if 'device_name' not in vol -%} + {%- set _dummy = vol.update({'device_name': host.hostname + '--' + vol.mountpoint | basename }) -%} + {%- set _dummy = vol.update({'initialize_params': {'disk_name': vol.device_name, 'disk_size_gb': vol.volume_size}}) -%} + {%- endif -%} + {%- endfor %} {%- endfor %} - {{ res }} + {{ cluster_hosts_target }} when: cluster_vars.type == "gcp" - name: get_cluster_hosts_target | cluster_hosts_target diff --git a/config/tasks/disks_auto_aws.yml b/config/tasks/disks_auto_aws_gcp.yml similarity index 50% rename from config/tasks/disks_auto_aws.yml rename to config/tasks/disks_auto_aws_gcp.yml index 479982d9..c30e4be9 100644 --- a/config/tasks/disks_auto_aws.yml +++ b/config/tasks/disks_auto_aws_gcp.yml @@ -1,44 +1,49 @@ --- -- name: disks_auto_aws | cluster_vars[buildenv].hosttype_vars[hostvars[inventory_hostname].hosttype - debug: msg={{ cluster_vars[buildenv].hosttype_vars[hostvars[inventory_hostname].hosttype] }} +- name: disks_auto_aws_gcp | cluster_hosts_target(inventory_hostname) + debug: msg={{ cluster_hosts_target | json_query(\"[?hostname == `\" + inventory_hostname + \"`] \") }} -- name: disks_auto_aws | Mount volumes as individual disks +- name: disks_auto_aws_gcp | Mount block devices as individual disks block: - - name: disks_auto_aws | Get the nvme information (pre-filesystem create) - ebsmap: + - name: disks_auto_aws_gcp | auto_vols + debug: msg={{ auto_vols }} + + - name: disks_auto_aws_gcp | Get the block device information (pre-filesystem create) + blockdevmap: + cloud_type: "{{cluster_vars.type}}" become: yes - register: r__ebsmap + register: r__blockdevmap - - name: disks_auto_aws | r__ebsmap (pre-filesystem create) - debug: msg={{r__ebsmap}} + - name: disks_auto_aws_gcp | r__blockdevmap (pre-filesystem create) + debug: msg={{r__blockdevmap}} - - name: disks_auto_aws | Create filesystem (partitionless) + - name: disks_auto_aws_gcp | Create filesystem (partitionless) become: yes filesystem: fstype: "{{ item.fstype }}" - dev: "{{ (r__ebsmap.device_map | selectattr('device_name_aws', '==', item.device_name) | list | last)['device_name_os'] }}" + dev: "{{ (r__blockdevmap.device_map | selectattr('device_name_cloud', '==', item.device_name) | list | last)['device_name_os'] }}" loop: "{{auto_vols}}" - - name: disks_auto_aws | Get the nvme information (post-filesystem create), to get the block IDs for mounting - ebsmap: + - name: disks_auto_aws_gcp | Get the block device information (post-filesystem create), to get the block IDs for mounting + blockdevmap: + cloud_type: "{{cluster_vars.type}}" become: yes - register: r__ebsmap + register: r__blockdevmap - - name: disks_auto_aws | r__ebsmap (post-filesystem create) - debug: msg={{r__ebsmap}} + - name: disks_auto_aws_gcp | r__blockdevmap (post-filesystem create) + debug: msg={{r__blockdevmap}} - - name: disks_auto_aws | Mount created filesytem(s) persistently + - name: disks_auto_aws_gcp | Mount created filesytem(s) persistently become: yes mount: path: "{{ item.mountpoint }}" - src: "UUID={{ (r__ebsmap.device_map | selectattr('device_name_aws', '==', item.device_name) | list | last)['UUID'] }}" + src: "UUID={{ (r__blockdevmap.device_map | selectattr('device_name_cloud', '==', item.device_name) | list | last)['UUID'] }}" fstype: "{{ item.fstype }}" state: mounted opts: _netdev loop: "{{auto_vols}}" - - name: disks_auto_aws | change ownership of mountpoint (if set) + - name: disks_auto_aws_gcp | change ownership of mountpoint (if set) become: yes file: path: "{{ item.mountpoint }}" @@ -49,70 +54,70 @@ loop: "{{auto_vols}}" - block: - - name: disks_auto_aws | Touch a file with the mountpoint and device name for testing that disk attachment is correct + - name: disks_auto_aws_gcp | Touch a file with the mountpoint and device name for testing that disk attachment is correct become: yes file: path: "{{item.mountpoint}}/__clusterversetest_{{ item.mountpoint | regex_replace('\/', '_') }}_{{ item.device_name | regex_replace('\/', '_') }}" state: touch loop: "{{auto_vols}}" - - name: disks_auto_aws | Find all __clusterversetest_ files in newly mounted disks + - name: disks_auto_aws_gcp | Find all __clusterversetest_ files in newly mounted disks find: paths: "{{item.mountpoint}}" patterns: "__clusterversetest_*" loop: "{{auto_vols}}" register: r__find_test - - name: disks_auto_aws | Display all __clusterversetest_ files in newly mounted disks. + - name: disks_auto_aws_gcp | Display all __clusterversetest_ files in newly mounted disks. debug: msg: "{{ r__find_test | json_query(\"results[].{device_name: item.device_name, mountpoint: item.mountpoint, files: files[].path}\") }}" when: test_touch_disks is defined and test_touch_disks|bool when: (auto_vols | map(attribute='mountpoint') | list | unique | count == auto_vols | map(attribute='mountpoint') | list | count) vars: - auto_vols: "{{ cluster_vars[buildenv].hosttype_vars[hostvars[inventory_hostname].hosttype].auto_volumes }}" + auto_vols: "{{ cluster_hosts_target | json_query(\"[?hostname == `\" + inventory_hostname + \"`].auto_volumes[]\") }}" -# The following block mounts all nvme attached volumes that have a single, common mountpoint, by creating a logical volume -- name: disks_auto_aws | Mount nvme volumes in a single mountpoint through LV/VG +# The following block mounts all attached volumes that have a single, common mountpoint, by creating a logical volume +- name: disks_auto_aws_gcp | Mount block devices in a single LVM mountpoint through LV/VG block: - - name: disks_auto_aws | Install logical volume management tooling. (yum - RedHat/CentOS) + - name: disks_auto_aws_gcp | Install logical volume management tooling. (yum - RedHat/CentOS) become: true yum: name: "lvm*" state: present when: ansible_os_family == 'RedHat' - - name: disks_auto_aws | Get the nvme information (pre-filesystem create) - ebsmap: + - name: disks_auto_aws_gcp | Get the device information (pre-filesystem create) + blockdevmap: become: yes - register: r__ebsmap + register: r__blockdevmap - - name: disks_auto_aws | r__ebsmap (pre-filesystem create) - debug: msg={{r__ebsmap}} + - name: disks_auto_aws_gcp | r__blockdevmap (pre-filesystem create) + debug: msg={{r__blockdevmap}} - - name: disks_auto_aws | Create a volume group from all nvme devices + - name: disks_auto_aws_gcp | Create a volume group from all block devices become: yes lvg: vg: "{{ hosttype_vars.lvmparams.vg_name }}" - pvs: "{{ r__ebsmap.device_map | json_query(\"[?device_name_aws && contains('\" + auto_vol_device_names + \"', device_name_aws)].device_name_os\") | join(',')}}" + pvs: "{{ r__blockdevmap.device_map | json_query(\"[?device_name_cloud && contains('\" + auto_vol_device_names + \"', device_name_cloud)].device_name_os\") | join(',')}}" vars: auto_vol_device_names: "{{hosttype_vars.auto_volumes | map(attribute='device_name') | sort | join(',')}}" - - name: disks_auto_aws | Create a logical volume from volume group + - name: disks_auto_aws_gcp | Create a logical volume from volume group become: yes lvol: vg: "{{ hosttype_vars.lvmparams.vg_name }}" lv: "{{ hosttype_vars.lvmparams.lv_name }}" size: "{{ hosttype_vars.lvmparams.lv_size }}" - - name: disks_auto_aws | Create filesystem(s) on attached nvme volume(s) + - name: disks_auto_aws_gcp | Create filesystem(s) on attached volume(s) become: yes filesystem: fstype: "{{ hosttype_vars.auto_volumes[0].fstype }}" dev: "/dev/{{ hosttype_vars.lvmparams.vg_name }}/{{ hosttype_vars.lvmparams.lv_name }}" force: no - - name: disks_auto_aws | Mount created filesytem(s) persistently + - name: disks_auto_aws_gcp | Mount created filesytem(s) persistently become: yes mount: path: "{{ hosttype_vars.auto_volumes[0].mountpoint }}" @@ -122,22 +127,22 @@ opts: _netdev - block: - - name: disks_auto_aws | Touch a file with the mountpoint and device name for testing that disk attachment is correct + - name: disks_auto_aws_gcp | Touch a file with the mountpoint and device name for testing that disk attachment is correct become: yes file: path: "{{ hosttype_vars.auto_volumes[0].mountpoint }}/__clusterversetest_{{ hosttype_vars.auto_volumes[0].mountpoint | regex_replace('\/', '_') }}" state: touch - - name: disks_auto_aws | Find all __clusterversetest_ files in newly mounted disks + - name: disks_auto_aws_gcp | Find all __clusterversetest_ files in newly mounted disks find: paths: "{{ hosttype_vars.auto_volumes[0].mountpoint }}" patterns: "__clusterversetest_*" register: r__find_test - - name: disks_auto_aws | Display all __clusterversetest_ files in newly mounted disks. + - name: disks_auto_aws_gcp | Display all __clusterversetest_ files in newly mounted disks. debug: msg: "{{ r__find_test | json_query(\"files[].path\") }}" when: test_touch_disks is defined and test_touch_disks|bool when: ('lvmparams' in hosttype_vars) and (hosttype_vars.auto_volumes | map(attribute='mountpoint') | list | unique | count == 1) and (hosttype_vars.auto_volumes | map(attribute='mountpoint') | list | count >= 2) and (hosttype_vars.auto_volumes | map(attribute='fstype') | list | unique | count == 1) vars: - hosttype_vars: "{{ cluster_vars[buildenv].hosttype_vars[hostvars[inventory_hostname].hosttype] }}" + hosttype_vars: "{{ cluster_hosts_target | json_query(\"[?hostname == `\" + inventory_hostname + \"`]\") }}" diff --git a/config/tasks/disks_auto_generic.yml b/config/tasks/disks_auto_generic.yml index 4c30ee1e..475edfa9 100644 --- a/config/tasks/disks_auto_generic.yml +++ b/config/tasks/disks_auto_generic.yml @@ -1,43 +1,34 @@ --- -#- debug: msg={{ cluster_vars[buildenv].hosttype_vars[hostvars[inventory_hostname].hosttype].auto_volumes }} -#- debug: msg={{ ansible_facts.devices }} - -- block: - - name: disks_auto_generic | Get unused block devices - set_fact: - block_devices: "{{ {'dev': item, 'size_b': (ansible_facts.devices[item].sectors|int) * (ansible_facts.devices[item].sectorsize|int)} }}" - with_items: "{{ ansible_facts.devices }}" - register: block_devices_list - when: item | regex_search("nvme|[xvsh]+d") and ansible_facts.devices[item].partitions == {} - - - name: disks_auto_generic | Create unused block devices list - set_fact: - lsblk_volumes: "{{ block_devices_list.results | map(attribute='ansible_facts.block_devices') | select('defined') | list }}" +- name: disks_auto_generic | Get the block device information (pre-filesystem create) + blockdevmap: + cloud_type: "lsblk" + become: yes + register: r__blockdevmap - - name: disks_auto_generic | lsblk_volumes - debug: msg={{ lsblk_volumes }} +- name: disks_auto_generic | r__blockdevmap + debug: msg={{ r__blockdevmap }} - name: disks_auto_generic | Create 'hostvols' fact that contains a list of available host devices (lsblk) mapped to the mountpoints defined in cluster_vars. Allow for multiple disks with same size. set_fact: hostvols: | {% set res = [] -%} - {% set tmp_blkvols = lsblk_volumes -%} + {% set tmp_blkvols = r__blockdevmap.device_map | selectattr('PTTYPE', '==', '') | selectattr('FSTYPE', '==', '') | selectattr('TYPE', '==', 'disk') | list -%} {% set inventory_hostname__no_suffix = inventory_hostname | regex_replace('-(?!.*-).*') -%} {%- for autovol in cluster_hosts_target | to_json | from_json | json_query('[?starts_with(hostname, \'' + inventory_hostname__no_suffix + '\')].auto_volumes[]') -%} {%- set blkvolloop = namespace(break=false) -%} {%- for blkvol in tmp_blkvols if not blkvolloop.break -%} - {%- if (autovol.volume_size*1073741824|int) == (blkvol.size_b|int) -%} - {%- set _ = res.extend([ {'device': '/dev/'+blkvol.dev, 'mountpoint': autovol.mountpoint, 'fstype': autovol.fstype, 'perms': autovol.perms | default({})}]) -%} - {%- set _ = tmp_blkvols.remove(blkvol) -%} + {%- if (autovol.volume_size*1073741824|int) == (blkvol['SIZE']|int) -%} + {%- set _ = res.extend([ {'device': blkvol['device_name_os'], 'mountpoint': autovol.mountpoint, 'fstype': autovol.fstype, 'perms': autovol.perms | default({})}]) -%} {%- set blkvolloop.break = true -%} + {%- set _ = tmp_blkvols.remove(blkvol) -%} {%- endif -%} {%- endfor -%} {%- endfor -%} {{ res }} -#- name: disks_auto_generic | hostvols -# debug: msg={{hostvols}} +- name: disks_auto_generic | hostvols + debug: msg={{hostvols}} # Create partition-less filesystems. - name: disks_auto_generic | Create filesystem(s) on attached volume(s) diff --git a/config/tasks/main.yml b/config/tasks/main.yml index 5a23db5e..b99cc683 100644 --- a/config/tasks/main.yml +++ b/config/tasks/main.yml @@ -52,13 +52,13 @@ mode: 0755 when: (static_journal is defined and static_journal|bool) -- name: Create partition table, format and attach volumes - AWS - include_tasks: disks_auto_aws.yml - when: cluster_vars.type == "aws" +- name: Create partition table, format and attach volumes - AWS or GCP + include_tasks: disks_auto_aws_gcp.yml + when: cluster_vars.type == "aws" or cluster_vars.type == "gcp" - name: Create partition table, format and attach volumes - generic include_tasks: disks_auto_generic.yml - when: cluster_vars.type != "aws" + when: cluster_vars.type != "aws" and cluster_vars.type != "gcp" - name: install prometheus node exporter daemon include_tasks: prometheus_node_exporter.yml @@ -68,13 +68,13 @@ include_tasks: filebeat.yml when: (filebeat_install is defined and filebeat_install|bool and (cluster_vars[buildenv].hosttype_vars[hosttype].skip_beat_install is undefined or (cluster_vars[buildenv].hosttype_vars[hosttype].skip_beat_install is defined and not cluster_vars[buildenv].hosttype_vars[hosttype].skip_beat_install|bool))) vars: - hosttype: "{{cluster_hosts_target | json_query('[? hostname == `' + inventory_hostname + '`].hosttype|[0]') }}" + hosttype: "{{cluster_hosts_target | json_query('[?hostname == `' + inventory_hostname + '`].hosttype|[0]') }}" - name: Install elastic metricbeat include_tasks: metricbeat.yml when: (metricbeat_install is defined and metricbeat_install|bool and (cluster_vars[buildenv].hosttype_vars[hosttype].skip_beat_install is undefined or (cluster_vars[buildenv].hosttype_vars[hosttype].skip_beat_install is defined and not cluster_vars[buildenv].hosttype_vars[hosttype].skip_beat_install|bool))) vars: - hosttype: "{{cluster_hosts_target | json_query('[? hostname == `' + inventory_hostname + '`].hosttype|[0]') }}" + hosttype: "{{cluster_hosts_target | json_query('[?hostname == `' + inventory_hostname + '`].hosttype|[0]') }}" - name: Install security cloud agent include_tasks: cloud_agents.yml diff --git a/create/tasks/gcp.yml b/create/tasks/gcp.yml index 75db396a..84824773 100644 --- a/create/tasks/gcp.yml +++ b/create/tasks/gcp.yml @@ -68,12 +68,20 @@ with_items: "{{ cluster_vars.firewall_rules }}" -- name: create/gcp | Generate GCE ssh public key from the private key provided on the command line - shell: ssh-keygen -y -f "{{ ansible_ssh_private_key_file }}" - register: r__gcp_ssh_pubkey - - name: create/gcp | Create GCP VMs asynchronously and wait for completion block: + - name: create/gcp | Detach volumes from previous instances (during the _scheme_rmvm_keepdisk_rollback redeploy, we only redeploy one host at a time, and it is already powered off) + gce_pd: + credentials_file: "{{gcp_credentials_file}}" + service_account_email: "{{gcp_credentials_json.client_email}}" + project_id: "{{cluster_vars[buildenv].vpc_project_id}}" + zone: "{{cluster_vars.region}}-{{item.az_name}}" + detach_only : yes + state: deleted + instance_name: "{{ item.auto_volume.src.hostname }}" + name: "{{item.auto_volume.src.source_url | basename}}" + loop: "{{ cluster_hosts_target_denormalised_by_volume | selectattr('auto_volume.src', 'defined') | list }}" + - name: create/gcp | Create GCP VMs asynchronously gcp_compute_instance: auth_kind: "serviceaccount" @@ -82,11 +90,10 @@ zone: "{{cluster_vars.region}}-{{item.az_name}}" name: "{{item.hostname}}" machine_type: "{{item.flavor}}" - disks: "{{_host_disks}}" + disks: "{{ [_bootdisk] + (_autodisks | default([])) }}" metadata: startup-script: "{%- if cluster_vars.ssh_guard_whitelist is defined and cluster_vars.ssh_guard_whitelist | length > 0 -%}#! /bin/bash\n\n#Whitelist my inbound IPs\n[ -f /etc/sshguard/whitelist ] && echo \"{{cluster_vars.ssh_guard_whitelist | join ('\n')}}\" >>/etc/sshguard/whitelist && /bin/systemctl restart sshguard{%- endif -%}" - ssh-keys: "{{ cliargs.remote_user }}:{{ r__gcp_ssh_pubkey.stdout }}" - # ssh-keys: "{{ cliargs.remote_user }}:{{ r__gcp_ssh_pubkey.stdout }} {{ cliargs.remote_user }}" + ssh-keys: "{{ cliargs.remote_user }}:{{ lookup('pipe', 'ssh-keygen -y -f ' + ansible_ssh_private_key_file) }} {{ cliargs.remote_user }}" labels: "{{ _labels | combine(cluster_vars.custom_tagslabels | default({})) }}" network_interfaces: - network: "{{ r__gcp_compute_network_info['resources'][0] | default({}) }}" @@ -98,11 +105,10 @@ state: present deletion_protection: "{{cluster_vars[buildenv].deletion_protection}}" vars: - __autodisksnames: "{%- if cluster_vars[buildenv].hosttype_vars[item.hosttype].auto_volumes | length -%}[{%- for vol in cluster_vars[buildenv].hosttype_vars[item.hosttype].auto_volumes -%}{%- set mountname = vol.mountpoint | regex_replace('.*\\/(.*)', '\\\\1') -%}{{vol|combine({'mountname': mountname})}}{% if not loop.last %},{% endif %}{%- endfor -%}]{%- else -%}[]{%- endif-%}" - _autodisks: "{{__autodisksnames | to_json | from_json | json_query(\" [].{auto_delete: auto_delete, interface: interface, device_name: join('',[`\"+item.hostname+\"--`,mountname]), initialize_params: {disk_name: join('',[`\"+item.hostname+\"--`,mountname]), disk_size_gb: volume_size}} \") }}" - _bootdisk: {auto_delete: true, boot: true, device_name: "{{ item.hostname }}--boot", initialize_params: {source_image: "{{cluster_vars.image}}", disk_name: "{{ item.hostname }}--boot", disk_size_gb: "{{item.rootvol_size}}"}} - _host_disks: "{{[_bootdisk] + _autodisks}}" + _bootdisk: {auto_delete: true, boot: true, device_name: "{{ item.hostname }}--boot", initialize_params: {source_image: "{{cluster_vars.image}}", disk_name: "{{ item.hostname }}--boot", disk_size_gb: "{{cluster_vars[buildenv].hosttype_vars[item.hosttype].rootvol_size}}"}} + _autodisks: "{{item.auto_volumes | json_query(\"[].{auto_delete: auto_delete, interface: interface, device_name: device_name, initialize_params: initialize_params, source: {selfLink: src.source_url}}\") }}" _labels: + name: "{{item.hostname}}" inv_node_version: "{{cluster_vars[buildenv].hosttype_vars[item.hosttype].version | default(omit)}}" inv_node_type: "{{item.hosttype}}" hosttype: "{{item.hosttype}}" @@ -112,7 +118,7 @@ maintenance_mode: "true" release: "{{ release_version }}" lifecycle_state: "current" - register: gcp_compute_instance + register: r__gcp_compute_instance with_items: "{{cluster_hosts_target}}" async: 7200 poll: 0 @@ -120,14 +126,115 @@ - name: create/gcp | Wait for GCE instance creation to complete async_status: jid: "{{ item.ansible_job_id }}" - register: gcp_jobs - until: gcp_jobs.finished + register: r__async_status__gcp_compute_instance + until: r__async_status__gcp_compute_instance.finished delay: 3 retries: 300 - with_items: "{{gcp_compute_instance.results}}" + with_items: "{{r__gcp_compute_instance.results}}" + + - name: create/gcp | r__async_status__gcp_compute_instance.results + debug: msg={{r__async_status__gcp_compute_instance.results}} - name: create/gcp | Set a fact containing the newly-created hosts set_fact: cluster_hosts_created: "{{ gcp_jobs.results | json_query(\"[?item.changed==`true`].item.item\") }}" + - name: create/gcp | Label the volumes + gce_labels: + project_id: "{{cluster_vars[buildenv].vpc_project_id}}" + credentials_file: "{{gcp_credentials_file}}" + service_account_email: "{{gcp_credentials_json.client_email}}" + resource_url: "{{item.resource_url}}" + labels: "{{ _labels | combine(cluster_vars.custom_tagslabels | default({})) }}" + with_items: "{{_ec2_vols_denormalised_by_device}}" + vars: + _ec2_vols_denormalised_by_device: |- + {% set res = [] -%} + {%- for host_instance in r__async_status__gcp_compute_instance.results -%} + {%- for disk in host_instance.disks -%} + {% set _ = res.append({'hostname': host_instance.name , 'hosttype': host_instance.labels.hosttype, 'device_name': disk.deviceName, 'disk_name': disk.source | basename, 'resource_url': disk.source, 'regionzone': host_instance.invocation.module_args.zone}) -%} + {%- endfor %} + {%- endfor %} + {{ res }} + _labels: + name: "{{ item.device_name }}" + inv_node_version: "{{cluster_vars[buildenv].hosttype_vars[item.hosttype].version | default(omit)}}" + inv_node_type: "{{item.hosttype}}" + owner: "{{ lookup('env','USER') | lower }}" + release: "{{ release_version }}" + +# - name: create/gcp | Attach (or create) volumes where 'src' is present (e.g. inserted as part of _scheme_rmvm_keepdisk_rollback scheme) +# gce_pd: +# credentials_file: "{{gcp_credentials_file}}" +# service_account_email: "{{gcp_credentials_json.client_email}}" +# project_id: "{{cluster_vars[buildenv].vpc_project_id}}" +# zone: "{{cluster_vars.region}}-{{item.az_name}}" +# delete_on_termination: yes +# disk_type : "{{item.auto_volume.volume_type | default(omit)}}" +# instance_name: "{{ item.hostname }}" +# mode: "READ_WRITE" +# name: "{{item.auto_volume.device_name}}" +# size_gb : "{%- if 'src' not in item.auto_volume -%}{{item.auto_volume.volume_size}}{%- endif -%}" +# loop: "{{ cluster_hosts_target_denormalised_by_volume | selectattr('auto_volume.src', 'defined') | list }}" +# async: 7200 +# poll: 0 +# register: r__gce_pd +# +# - name: create/aws | Wait for volume creation/ attachment to complete +# async_status: { jid: "{{ item.ansible_job_id }}" } +# register: r__async_status__gce_pd +# until: r__async_status__gce_pd.finished +# delay: 3 +# retries: 300 +# with_items: "{{r__gce_pd.results}}" +# +# - name: create/gcp | Get existing GCE instance info (per AZ) +# gcp_compute_instance_info: +# zone: "{{cluster_vars.region}}-{{item}}" +# filters: +# - "labels.cluster_name = {{cluster_name}}" +# - "labels.lifecycle_state = current" +# project: "{{cluster_vars[buildenv].vpc_project_id}}" +# auth_kind: "serviceaccount" +# service_account_file: "{{gcp_credentials_file}}" +# scopes: ["https://www.googleapis.com/auth/compute.readonly"] +# with_items: "{{ cluster_vars[buildenv].hosttype_vars | json_query(\"*[vms_by_az][][keys(@)][][]\") | unique }}" +# register: r__gcp_compute_instance_info +# +# - name: create/gcp | r__gcp_compute_instance_info.results +# debug: msg={{r__gcp_compute_instance_info.results}} +# +# - name: create/gcp | Label the volumes +# gce_labels: +# project_id: "{{cluster_vars[buildenv].vpc_project_id}}" +# credentials_file: "{{gcp_credentials_file}}" +# service_account_email: "{{gcp_credentials_json.client_email}}" +# resource_url: "{{item.resource_url}}" +# labels: "{{ _labels | combine(cluster_vars.custom_tagslabels | default({})) }}" +# with_items: "{{_ec2_vols_denormalised_by_device}}" +# vars: +# _ec2_vols_denormalised_by_device: |- +# {% set res = [] -%} +# {%- for zone_result in r__gcp_compute_instance_info.results -%} +# {%- for host_instance in zone_result.resources -%} +# {%- for disk in host_instance.disks -%} +# {% set _ = res.append({'hostname': host_instance.name , 'hosttype': host_instance.labels.hosttype, 'device_name': disk.deviceName, 'disk_name': disk.source | basename, 'resource_url': disk.source, 'regionzone': zone_result.invocation.module_args.zone}) -%} +# {%- endfor %} +# {%- endfor %} +# {%- endfor %} +# {{ res }} +# _labels: +# name: "{{ item.device_name }}" +# inv_node_version: "{{cluster_vars[buildenv].hosttype_vars[item.hosttype].version | default(omit)}}" +# inv_node_type: "{{item.hosttype}}" +# owner: "{{ lookup('env','USER') | lower }}" +# release: "{{ release_version }}" +# - name: create/gcp | Label the volumes ## DOES NOT ADD / MODITY LABELS ON EXISTING DISKS (cannot use for attaching and relabelling existing disks) +# gcp_compute_disk: +# auth_kind: "serviceaccount" +# service_account_file: "{{gcp_credentials_file}}" +# project: "{{cluster_vars[buildenv].vpc_project_id}}" +# zone: "{{item.regionzone}}" +# name: "{{item.disk_name}}" +# labels: "{{ _labels | combine(cluster_vars.custom_tagslabels | default({})) }}" diff --git a/redeploy/_scheme_rmvm_keepdisk_rollback/tasks/_add_src_diskinfo_to_cluster_hosts_target__aws.yml b/redeploy/_scheme_rmvm_keepdisk_rollback/tasks/_add_src_diskinfo_to_cluster_hosts_target__aws.yml index 07db0737..b916e101 100644 --- a/redeploy/_scheme_rmvm_keepdisk_rollback/tasks/_add_src_diskinfo_to_cluster_hosts_target__aws.yml +++ b/redeploy/_scheme_rmvm_keepdisk_rollback/tasks/_add_src_diskinfo_to_cluster_hosts_target__aws.yml @@ -18,10 +18,10 @@ cluster_hosts_target: | {%- for cht_host in cluster_hosts_target -%} {%- for cht_autovol in cht_host.auto_volumes -%} - {%- for chs_host_info_result in r__ec2_instance_info.instances | selectattr('tags.lifecycle_state', '!=', 'current') -%} - {%- if cht_host.hostname | regex_replace('-(?!.*-).*') == chs_host_info_result.tags.Name | regex_replace('-(?!.*-).*') -%} - {%- for chs_host_diskinfo in chs_host_info_result.block_device_mappings | selectattr('device_name', '==', cht_autovol.device_name) -%} - {%- set _ = cht_autovol.update({'src': {'instance_id': chs_host_info_result.instance_id, 'device_name': chs_host_diskinfo.device_name, 'volume_id': chs_host_diskinfo.ebs.volume_id }}) -%} + {%- for ec2_instance_info_result in r__ec2_instance_info.instances | selectattr('tags.lifecycle_state', '!=', 'current') -%} + {%- if cht_host.hostname | regex_replace('-(?!.*-).*') == ec2_instance_info_result.tags.Name | regex_replace('-(?!.*-).*') -%} + {%- for chs_host_diskinfo in ec2_instance_info_result.block_device_mappings | selectattr('device_name', '==', cht_autovol.device_name) -%} + {%- set _ = cht_autovol.update({'src': {'instance_id': ec2_instance_info_result.instance_id, 'device_name': chs_host_diskinfo.device_name, 'volume_id': chs_host_diskinfo.ebs.volume_id }}) -%} {%- endfor -%} {%- endif -%} {%- endfor -%} diff --git a/redeploy/_scheme_rmvm_keepdisk_rollback/tasks/_add_src_diskinfo_to_cluster_hosts_target__gcp.yml b/redeploy/_scheme_rmvm_keepdisk_rollback/tasks/_add_src_diskinfo_to_cluster_hosts_target__gcp.yml new file mode 100644 index 00000000..5b278a9f --- /dev/null +++ b/redeploy/_scheme_rmvm_keepdisk_rollback/tasks/_add_src_diskinfo_to_cluster_hosts_target__gcp.yml @@ -0,0 +1,39 @@ +--- + +- name: _get_diskinfo_gcp | Get existing GCE instance info (per AZ) + gcp_compute_instance_info: + zone: "{{cluster_vars.region}}-{{item}}" + filters: + - "labels.cluster_name = {{cluster_name}}" + project: "{{cluster_vars[buildenv].vpc_project_id}}" + auth_kind: "serviceaccount" + service_account_file: "{{gcp_credentials_file}}" + scopes: ["https://www.googleapis.com/auth/compute.readonly"] + with_items: "{{ cluster_vars[buildenv].hosttype_vars | json_query(\"*[vms_by_az][][keys(@)][][]\") | unique }}" + register: r__gcp_compute_instance_info + +- name: _get_diskinfo_gcp | r__gcp_compute_instance_info.results + debug: msg={{r__gcp_compute_instance_info.results}} + +- name: _get_diskinfo_gcp | augment/update cluster_hosts_target auto_volumes with source disk info + set_fact: + cluster_hosts_target: | + {%- for cht_host in cluster_hosts_target -%} + {%- for cht_autovol in cht_host.auto_volumes -%} + {%- for gcp_compute_instance_result in r__gcp_compute_instance_info.results | json_query('[].resources[?labels.lifecycle_state != "current"][]') -%} + {%- if cht_host.hostname | regex_replace('-(?!.*-).*') == gcp_compute_instance_result.name | regex_replace('-(?!.*-).*') -%} + {%- for gcp_compute_instance_diskinfo in gcp_compute_instance_result.disks -%} + {%- if cht_autovol.initialize_params.disk_name | regex_replace('(.*)-.*(--.*)', '\\1\\2') == gcp_compute_instance_diskinfo.source | basename | regex_replace('(.*)-.*(--.*)', '\\1\\2') -%} + {%- set _ = cht_autovol.update({'device_name': gcp_compute_instance_diskinfo.source | basename}) -%} + {%- set _ = cht_autovol.update({'src': {'hostname': gcp_compute_instance_result.name, 'device_name': cht_autovol.device_name, 'source_url': gcp_compute_instance_diskinfo.source }}) -%} + {%- set _ = cht_autovol.update({'initialize_params': {'disk_name': cht_autovol.device_name, 'disk_size_gb': gcp_compute_instance_diskinfo.diskSizeGb}}) -%} + {%- endif -%} + {%- endfor -%} + {%- endif -%} + {%- endfor -%} + {%- endfor -%} + {%- endfor -%} + {{cluster_hosts_target}} + +- name: _get_diskinfo_gcp | cluster_hosts_target + debug: msg={{cluster_hosts_target}} diff --git a/redeploy/_scheme_rmvm_keepdisk_rollback/tasks/preflight.yml b/redeploy/_scheme_rmvm_keepdisk_rollback/tasks/preflight.yml index dbc99a0a..e7091c8d 100644 --- a/redeploy/_scheme_rmvm_keepdisk_rollback/tasks/preflight.yml +++ b/redeploy/_scheme_rmvm_keepdisk_rollback/tasks/preflight.yml @@ -3,7 +3,7 @@ - name: Preflight check block: - block: - - assert: { that: "cluster_vars.type != 'gcp'", fail_msg: "This scheme is not supported on GCP." } +# - assert: { that: "cluster_vars.type != 'gcp'", fail_msg: "This scheme is not supported on GCP." } - name: Preflight check | get ec2_instance_info for current disk information ec2_instance_info: From c56e0a824ba910bee8693be0a6d9d8028404c405 Mon Sep 17 00:00:00 2001 From: Dougal Seeley Date: Sun, 25 Oct 2020 14:21:32 +0000 Subject: [PATCH 21/58] Fix for _scheme_addnewvm_rmdisk_rollback when using myhosttypes --- .../_scheme_addnewvm_rmdisk_rollback/tasks/main.yml | 8 ++++---- .../tasks/preflight.yml | 4 +--- redeploy/_scheme_rmvm_rmdisk_only/tasks/main.yml | 12 +++++++++--- redeploy/tasks/main.yml | 4 ++-- 4 files changed, 16 insertions(+), 12 deletions(-) diff --git a/redeploy/_scheme_addnewvm_rmdisk_rollback/tasks/main.yml b/redeploy/_scheme_addnewvm_rmdisk_rollback/tasks/main.yml index 2a71a918..d3c7b9d3 100644 --- a/redeploy/_scheme_addnewvm_rmdisk_rollback/tasks/main.yml +++ b/redeploy/_scheme_addnewvm_rmdisk_rollback/tasks/main.yml @@ -5,7 +5,7 @@ - assert: { that: "non_current_hosts | length == 0", msg: "ERROR - All VMs must be in the 'current' lifecycle_state. Those not [{{non_current_hosts | join(',')}}]" } vars: non_current_hosts: "{{ cluster_hosts_state | json_query(\"[?tagslabels.lifecycle_state!='current'].name\") }}" - when: canary=="start" or canary=="none" + when: (canary=="start" or canary=="none") and (myhosttypes is not defined or myhosttypes=='') - name: Redeploy by hosttype; rollback on fail @@ -39,14 +39,14 @@ name: "{{predeleterole}}" when: predeleterole is defined and predeleterole != "" vars: - hosts_to_remove: "{{ cluster_hosts_state | json_query(\"[?tagslabels.lifecycle_state=='retiring' && (contains('RUNNING,running', instance_state))]\") }}" + hosts_to_remove: "{{ hosts_to_stop | json_query(\"[?contains('RUNNING,running', instance_state)]\") }}" - name: Power off any other retiring VM(s) that might exist if we're redeploying to a smaller topology. include_role: name: clusterverse/redeploy/__common tasks_from: poweroff_vms.yml - vars: - hosts_to_stop: "{{ cluster_hosts_state | json_query(\"[?tagslabels.lifecycle_state=='retiring']\") }}" + vars: + hosts_to_stop: "{{ cluster_hosts_state | json_query(\"[?tagslabels.lifecycle_state=='retiring' && ('\"+ myhosttypes|default('') + \"' == '' || contains('\"+ myhosttypes|default('') + \"', tagslabels.hosttype))]\") }}" when: (canary=="finish" or canary=="none") - name: re-acquire cluster_hosts_target and cluster_hosts_state (for tidy - can't be in the tidy block because the block depends on this info being correct) diff --git a/redeploy/_scheme_rmvm_keepdisk_rollback/tasks/preflight.yml b/redeploy/_scheme_rmvm_keepdisk_rollback/tasks/preflight.yml index e7091c8d..251e7cf9 100644 --- a/redeploy/_scheme_rmvm_keepdisk_rollback/tasks/preflight.yml +++ b/redeploy/_scheme_rmvm_keepdisk_rollback/tasks/preflight.yml @@ -3,8 +3,6 @@ - name: Preflight check block: - block: -# - assert: { that: "cluster_vars.type != 'gcp'", fail_msg: "This scheme is not supported on GCP." } - - name: Preflight check | get ec2_instance_info for current disk information ec2_instance_info: filters: { "instance-state-name": [ "running", "stopped" ], "tag:cluster_name": "{{cluster_name}}", "tag:lifecycle_state": "current" } @@ -34,6 +32,6 @@ - assert: { that: "non_current_hosts | length == 0", fail_msg: "ERROR - All VMs must be in the 'current' lifecycle_state. Those not [{{non_current_hosts | join(',')}}]" } vars: { non_current_hosts: "{{ cluster_hosts_state | json_query(\"[?tagslabels.lifecycle_state!='current'].name\") }}" } - when: (canary=="start" or canary=="none") + when: (canary=="start" or canary=="none") and (myhosttypes is not defined or myhosttypes=='') - assert: { that: "(cluster_hosts_state | selectattr('tagslabels.lifecycle_state', '==', 'current') | list | length) == (cluster_hosts_target | length)", fail_msg: "Cannot use this scheme to redeploy to a different-sized cluster" } diff --git a/redeploy/_scheme_rmvm_rmdisk_only/tasks/main.yml b/redeploy/_scheme_rmvm_rmdisk_only/tasks/main.yml index c8f38c96..75e78f71 100644 --- a/redeploy/_scheme_rmvm_rmdisk_only/tasks/main.yml +++ b/redeploy/_scheme_rmvm_rmdisk_only/tasks/main.yml @@ -2,13 +2,19 @@ - name: Preflight check block: - - assert: - that: "{{chs_hosts | difference(chf_hosts) | length==0}}" - msg: "Cannot use this scheme to redeploy to smaller cluster; [{{ chs_hosts | join(',') }}] > [{{ chf_hosts | join(',') }}]" + - assert: { that: "non_current_hosts | length == 0", msg: "ERROR - All VMs must be in the 'current' lifecycle_state. Those not [{{non_current_hosts | join(',')}}]" } + vars: + non_current_hosts: "{{ cluster_hosts_state | json_query(\"[?tagslabels.lifecycle_state!='current'].name\") }}" + when: canary=="start" or canary=="none" + + - assert: { that: "{{chs_hosts | difference(chf_hosts) | length==0}}", fail_msg: "Cannot use this scheme to redeploy to smaller cluster; [{{ chs_hosts | join(',') }}] > [{{ chf_hosts | join(',') }}]" } vars: chf_hosts: "{{ cluster_hosts_target | json_query(\"[].hostname\") | map('regex_replace', '-(?!.*-).*') | list }}" chs_hosts: "{{ cluster_hosts_state | json_query(\"[].name\") | map('regex_replace', '-(?!.*-).*') | list }}" + - assert: { that: "canary != 'tidy'", fail_msg: "'tidy' is not valid for this redeploy scheme" } + + - name: Run redeploy per hosttype. Delete one at a time, then reprovision. include_tasks: by_hosttype.yml with_items: "{{ myhosttypes_array }}" diff --git a/redeploy/tasks/main.yml b/redeploy/tasks/main.yml index e4788e94..56153831 100644 --- a/redeploy/tasks/main.yml +++ b/redeploy/tasks/main.yml @@ -3,14 +3,14 @@ - name: Preflight check - Redeploy block: - assert: { that: "clean is not defined", msg: "Must not set the 'clean' variable for a redeploy" } - - assert: { that: "canary is defined and (canary is defined and canary in ['start', 'finish', 'none', 'tidy', 'revert'])", msg: "Canary must be 'start', 'finish', 'none', 'tidy' or 'revert'" } + - assert: { that: "canary is defined and (canary is defined and canary in ['start', 'finish', 'none', 'tidy'])", msg: "Canary must be 'start', 'finish', 'none' or 'tidy'" } - assert: { that: "redeploy_scheme is defined and redeploy_scheme in redeploy_schemes_supported" } - assert: { that: "cluster_hosts_state | length", msg: "Redeploy only possible with an existing cluster." } - name: "Run the {{redeploy_scheme}} redploy scheme" include_role: name: "{{role_path}}/{{redeploy_scheme}}" - when: redeploy_scheme is defined + when: redeploy_scheme is defined - name: Get the final dynamic inventory (to write out current) From 86bcf6c5eba8e04b16a57df3196e1d0f4b6d2356 Mon Sep 17 00:00:00 2001 From: Dougal Seeley Date: Sun, 25 Oct 2020 22:09:05 +0000 Subject: [PATCH 22/58] Rename _scheme_rmvm_keepdisk_only__copy_or_move variable to _scheme_rmvm_keepdisk_rollback__copy_or_move --- .../_add_src_diskinfo_to_cluster_hosts_target__esxifree.yml | 4 ++-- redeploy/_scheme_rmvm_keepdisk_rollback/tasks/preflight.yml | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/redeploy/_scheme_rmvm_keepdisk_rollback/tasks/_add_src_diskinfo_to_cluster_hosts_target__esxifree.yml b/redeploy/_scheme_rmvm_keepdisk_rollback/tasks/_add_src_diskinfo_to_cluster_hosts_target__esxifree.yml index ee89196e..d8e791e2 100644 --- a/redeploy/_scheme_rmvm_keepdisk_rollback/tasks/_add_src_diskinfo_to_cluster_hosts_target__esxifree.yml +++ b/redeploy/_scheme_rmvm_keepdisk_rollback/tasks/_add_src_diskinfo_to_cluster_hosts_target__esxifree.yml @@ -15,7 +15,7 @@ # debug: msg={{r__vmware_guest_disk_info}} - assert: { that: "r__vmware_guest_disk_info | json_query(\"results[].guest_disk_info.*[] | [?backing_datastore!='\" + cluster_vars.datastore + \"']\") | length == 0", msg: "Move is only possible if disks are on the same datastore." } - when: _scheme_rmvm_keepdisk_only__copy_or_move == "move" + when: _scheme_rmvm_keepdisk_rollback__copy_or_move == "move" - name: _get_diskinfo_esxifree | augment cluster_hosts_target auto_volumes with source disk info set_fact: @@ -25,7 +25,7 @@ {%- for chs_host_info_result in r__vmware_guest_disk_info.results -%} {%- if cht_host.hostname | regex_replace('-(?!.*-).*') == chs_host_info_result.item.name | regex_replace('-(?!.*-).*') -%} {%- for chs_host_diskinfo in chs_host_info_result.guest_disk_info | to_json | from_json | json_query('*| [?unit_number!=`0` && backing_type==\'FlatVer2\' && contains(backing_filename, \'--' + cht_autovol.volname + '.vmdk\')]') -%} - {%- set _ = cht_autovol.update({'volume_size': (chs_host_diskinfo.capacity_in_bytes/1073741824)|int, 'src': {'backing_filename': chs_host_diskinfo.backing_filename, 'copy_or_move': _scheme_rmvm_keepdisk_only__copy_or_move }}) -%} + {%- set _ = cht_autovol.update({'volume_size': (chs_host_diskinfo.capacity_in_bytes/1073741824)|int, 'src': {'backing_filename': chs_host_diskinfo.backing_filename, 'copy_or_move': _scheme_rmvm_keepdisk_rollback__copy_or_move }}) -%} {%- endfor -%} {%- endif -%} {%- endfor -%} diff --git a/redeploy/_scheme_rmvm_keepdisk_rollback/tasks/preflight.yml b/redeploy/_scheme_rmvm_keepdisk_rollback/tasks/preflight.yml index fc802d5b..4a00f6fd 100644 --- a/redeploy/_scheme_rmvm_keepdisk_rollback/tasks/preflight.yml +++ b/redeploy/_scheme_rmvm_keepdisk_rollback/tasks/preflight.yml @@ -30,7 +30,7 @@ {{ testloop.is_not_subset }} when: cluster_vars.type == "aws" - - assert: { that: "_scheme_rmvm_keepdisk_only__copy_or_move is defined and _scheme_rmvm_keepdisk_only__copy_or_move in ['copy', 'move']", fail_msg: "ERROR - _scheme_rmvm_keepdisk_only__copy_or_move must be defined and set to either 'copy' or 'move'" } + - assert: { that: "_scheme_rmvm_keepdisk_rollback__copy_or_move is defined and _scheme_rmvm_keepdisk_rollback__copy_or_move in ['copy', 'move']", fail_msg: "ERROR - _scheme_rmvm_keepdisk_rollback__copy_or_move must be defined and set to either 'copy' or 'move'" } when: cluster_vars.type == "esxifree" - assert: { that: "non_current_hosts | length == 0", fail_msg: "ERROR - All VMs must be in the 'current' lifecycle_state. Those not [{{non_current_hosts | join(',')}}]" } From 0a3a0c845d656de96e6a9a5ce197a3f3fed48a50 Mon Sep 17 00:00:00 2001 From: Dougal Seeley Date: Sat, 22 Aug 2020 12:52:56 +0100 Subject: [PATCH 23/58] Add functionality to support free ESXI, (using https://github.com/dseeley/esxifree_guest). Add redeploy scheme (_scheme_rmvm_keepdisk_only), which supports copying or moving the disks from previous cluster member to new member (only support esxi-free to date). --- EXAMPLE/Pipfile | 1 + EXAMPLE/cluster.yml | 15 ++-- EXAMPLE/group_vars/_skel/cluster_vars.yml | 40 +++++++++++ .../group_vars/test_aws_euw1/cluster_vars.yml | 3 + .../group_vars/test_gcp_euw1/cluster_vars.yml | 5 +- clean/tasks/clean_vms.yml | 24 +++++++ .../tasks/get_cluster_hosts_state.yml | 31 +++++---- .../tasks/get_cluster_hosts_target.yml | 13 ++++ config/tasks/disks_auto.yml | 3 +- create/tasks/esxifree.yml | 39 +++++++++++ dynamic_inventory/tasks/esxifree.yml | 45 ++++++++++++ dynamic_inventory/tasks/main.yml | 4 +- redeploy/__common/tasks/poweroff_vms.yml | 24 +++++++ redeploy/__common/tasks/poweron_vms.yml | 12 ++++ .../tasks/set_lifecycle_state_label.yml | 12 ++++ .../tasks/_add_diskinfo_esxifree.yml | 36 ++++++++++ .../tasks/by_hosttype.yml | 23 +++++++ .../tasks/by_hosttype_by_host.yml | 51 ++++++++++++++ .../_scheme_rmvm_keepdisk_only/tasks/main.yml | 69 +++++++++++++++++++ redeploy/tasks/main.yml | 2 +- 20 files changed, 426 insertions(+), 26 deletions(-) create mode 100644 create/tasks/esxifree.yml create mode 100644 dynamic_inventory/tasks/esxifree.yml create mode 100644 redeploy/_scheme_rmvm_keepdisk_only/tasks/_add_diskinfo_esxifree.yml create mode 100644 redeploy/_scheme_rmvm_keepdisk_only/tasks/by_hosttype.yml create mode 100644 redeploy/_scheme_rmvm_keepdisk_only/tasks/by_hosttype_by_host.yml create mode 100644 redeploy/_scheme_rmvm_keepdisk_only/tasks/main.yml diff --git a/EXAMPLE/Pipfile b/EXAMPLE/Pipfile index bed02bb4..8a16c139 100644 --- a/EXAMPLE/Pipfile +++ b/EXAMPLE/Pipfile @@ -14,6 +14,7 @@ jmespath = "*" dnspython = "*" google-auth = "*" google-api-python-client = "*" +paramiko = "*" [dev-packages] diff --git a/EXAMPLE/cluster.yml b/EXAMPLE/cluster.yml index 5df25bd9..1cbe7eb1 100644 --- a/EXAMPLE/cluster.yml +++ b/EXAMPLE/cluster.yml @@ -3,28 +3,25 @@ - name: Deploy the cluster hosts: localhost connection: local - gather_facts: no tasks: - - { name: "Get dependent roles via ansible-galaxy", local_action: "command ansible-galaxy install -fr requirements.yml", tags: ["always"] } - - - { include_role: { name: "clusterverse/clean", apply: { tags: ["clusterverse_clean"]} }, tags: ["clusterverse_clean"], when: "clean is defined" } - - { include_role: { name: "clusterverse/create", apply: { tags: ["clusterverse_create"]} }, tags: ["clusterverse_create"] } - - { include_role: { name: "clusterverse/dynamic_inventory", apply: { tags: ["clusterverse_dynamic_inventory"]} }, tags: ["clusterverse_dynamic_inventory"] } + - { import_role: { name: clusterverse/clean }, tags: [clusterverse_clean], when: clean is defined } # Alternative include_role (need to force the tags): - { include_role: { name: clusterverse/clean, apply: {tags: [clusterverse_clean]}}, tags: [clusterverse_clean], when: clean is defined } + - { import_role: { name: clusterverse/create }, tags: [clusterverse_create] } + - { import_role: { name: clusterverse/dynamic_inventory }, tags: [clusterverse_dynamic_inventory] } - name: Configure the cluster hosts: all tasks: - - { include_role: { name: "clusterverse/config", apply: { tags: ["clusterverse_config"]} }, tags: ["clusterverse_config"] } + - { import_role: { name: clusterverse/config }, tags: [clusterverse_config] } ## Application roles - name: Application roles hosts: all tasks: - - { include_role: { name: "testrole", apply: { tags: ["testrole"]} }, tags: ["testrole"] } + - { import_role: { name: "testrole" }, tags: [testrole] } ## - name: Perform cluster readiness operations hosts: localhost connection: local tasks: - - { include_role: { name: "clusterverse/readiness", apply: { tags: ["clusterverse_readiness"]} }, tags: ["clusterverse_readiness"] } + - { import_role: { name: clusterverse/readiness }, tags: [clusterverse_readiness] } diff --git a/EXAMPLE/group_vars/_skel/cluster_vars.yml b/EXAMPLE/group_vars/_skel/cluster_vars.yml index 5f2029f9..96bafb53 100644 --- a/EXAMPLE/group_vars/_skel/cluster_vars.yml +++ b/EXAMPLE/group_vars/_skel/cluster_vars.yml @@ -1,5 +1,7 @@ --- +redeploy_schemes_supported: [] + # GCP credentials gcp_credentials_file: "{{ lookup('env','GCP_CREDENTIALS') | default('/dev/null', true) }}" gcp_credentials_json: "{{ lookup('file', gcp_credentials_file) | default({'project_id': 'GCP_CREDENTIALS__NOT_SET','client_email': 'GCP_CREDENTIALS__NOT_SET'}, true) }}" @@ -130,3 +132,41 @@ cluster_name: "{{app_name}}-{{buildenv}}" # Identifies the cluster within #_region: *region #_ssh_guard_whitelist: *ssh_guard_whitelist #_dns_nameserver_zone: *dns_nameserver_zone + +### ESXi-free example +#cluster_vars: +# type: &cloud_type "esxifree" +# image: "gold-ubuntu2004-20200411145623" +# esxi_ip: "10.189.132.4" +# username: "svc" +# password: "" +# dns_cloud_internal_domain: "" # The cloud-internal zone as defined by the cloud provider (e.g. GCP, AWS) +# dns_nameserver_zone: &dns_nameserver_zone "" # The zone that dns_server will operate on. gcloud dns needs a trailing '.'. Leave blank if no external DNS (use IPs only) +# dns_user_domain: "{%- if _dns_nameserver_zone -%}{{_dns_nameserver_zone}}{%- endif -%}" +# dns_server: "" # Specify DNS server. nsupdate, route53 or clouddns. If empty string is specified, no DNS will be added. +# custom_tagslabels: +# inv_environment_id: "{{buildenv}}" +# inv_service_id: "{{app_class}}" +# inv_cluster_id: "{{cluster_name}}" +# inv_cluster_type: "{{app_name}}" +# datastore: "datastore1" +# hardware_version: "15" +# cloudinit_userdata: +# - name: user1 +# groups: "admin" +# lock_passwd: true +# ssh_authorized_keys: ['ssh-rsa AAAzaC1yc2EAAAADAQ...dojtl6mzVnSL29LQ=='] +# passwd: $6$j322wezy...m2RrkJPfghBZMN1O/ +# sandbox: +# networks: +# - networkName: "VM Network" +# virtualDev: vmxnet3 +# #macAddress: "00:0c:29:be:51:d0" #dev01 +# cloudinit_netplan: +# ethernets: {eth0: { dhcp4: true }, nameservers: { addresses: ["8.8.8.8", "8.8.4.4"] } } +# hosttype_vars: +# sys: {vms_by_az: {a: 1, b: 0, c: 0}, flavor: {num_cpus: "2", memory_mb: "2048"}, version: "{{sys_version | default('')}}", auto_volumes: []} +# #sysdisks: {vms_by_az: {a: 1, b: 0, c: 0}, flavor: {num_cpus: "2", memory_mb: "2048"}, version: "{{sys_version | default('')}}", auto_volumes: [{mountpoint: "/media/mysvc", volume_size: 2, provisioning_type: "thin", fstype: "ext4"}]} +#_cloud_type: *cloud_type +#_dns_nameserver_zone: *dns_nameserver_zone +# \ No newline at end of file diff --git a/EXAMPLE/group_vars/test_aws_euw1/cluster_vars.yml b/EXAMPLE/group_vars/test_aws_euw1/cluster_vars.yml index e262bf7d..93c9bc10 100644 --- a/EXAMPLE/group_vars/test_aws_euw1/cluster_vars.yml +++ b/EXAMPLE/group_vars/test_aws_euw1/cluster_vars.yml @@ -1,8 +1,11 @@ --- +redeploy_schemes_supported: ['_scheme_addallnew_rmdisk_rollback', '_scheme_addnewvm_rmdisk_rollback', '_scheme_rmvm_rmdisk_only'] + #redeploy_scheme: _scheme_addallnew_rmdisk_rollback #redeploy_scheme: _scheme_addnewvm_rmdisk_rollback #redeploy_scheme: _scheme_rmvm_rmdisk_only +#redeploy_scheme: _scheme_rmvm_keepdisk_only app_name: "test" # The name of the application cluster (e.g. 'couchbase', 'nginx'); becomes part of cluster_name. app_class: "test" # The class of application (e.g. 'database', 'webserver'); becomes part of the fqdn diff --git a/EXAMPLE/group_vars/test_gcp_euw1/cluster_vars.yml b/EXAMPLE/group_vars/test_gcp_euw1/cluster_vars.yml index 4b08de80..eb10848f 100644 --- a/EXAMPLE/group_vars/test_gcp_euw1/cluster_vars.yml +++ b/EXAMPLE/group_vars/test_gcp_euw1/cluster_vars.yml @@ -4,9 +4,12 @@ gcp_credentials_file: "{{ lookup('env','GCP_CREDENTIALS') | default('/dev/null', true) }}" gcp_credentials_json: "{{ lookup('file', gcp_credentials_file) | default({'project_id': 'GCP_CREDENTIALS__NOT_SET','client_email': 'GCP_CREDENTIALS__NOT_SET'}, true) }}" -#redeploy_scheme: _scheme_addallnew_rmdisk_rollback +redeploy_schemes_supported: ['_scheme_addallnew_rmdisk_rollback', '_scheme_addnewvm_rmdisk_rollback', '_scheme_rmvm_rmdisk_only'] + +redeploy_scheme: _scheme_addallnew_rmdisk_rollback #redeploy_scheme: _scheme_addnewvm_rmdisk_rollback #redeploy_scheme: _scheme_rmvm_rmdisk_only +#redeploy_scheme: _scheme_rmvm_keepdisk_only app_name: "test" # The name of the application cluster (e.g. 'couchbase', 'nginx'); becomes part of cluster_name. app_class: "test" # The class of application (e.g. 'database', 'webserver'); becomes part of the fqdn diff --git a/clean/tasks/clean_vms.yml b/clean/tasks/clean_vms.yml index 8bc68497..aa94f1ce 100644 --- a/clean/tasks/clean_vms.yml +++ b/clean/tasks/clean_vms.yml @@ -62,4 +62,28 @@ retries: 300 with_items: "{{r__gcp_compute_instance.results}}" when: cluster_vars.type == "gcp" + + - block: + - name: clean/del_vms/esxifree | Delete vmware VM + esxifree_guest: + hostname: "{{ cluster_vars.esxi_ip }}" + username: "{{ cluster_vars.username }}" + password: "{{ cluster_vars.password }}" + name: "{{item.name}}" + state: absent + register: esxi_instances + run_once: true + with_items: "{{hosts_to_clean}}" + async: 7200 + poll: 0 + + - name: clean_vms_esxifree | Wait for esxifree VM deletion to complete + async_status: + jid: "{{ item.ansible_job_id }}" + register: esxi_jobs + until: esxi_jobs.finished + retries: 300 + with_items: "{{esxi_instances.results}}" + when: cluster_vars.type == "esxifree" + when: hosts_to_clean | length \ No newline at end of file diff --git a/cluster_hosts/tasks/get_cluster_hosts_state.yml b/cluster_hosts/tasks/get_cluster_hosts_state.yml index 3571111a..7d50dafa 100644 --- a/cluster_hosts/tasks/get_cluster_hosts_state.yml +++ b/cluster_hosts/tasks/get_cluster_hosts_state.yml @@ -47,22 +47,22 @@ _cluster_hosts_state__urlregion: "{{r__gcp_compute_instance_info.results | json_query(\"[?resources[?labels]].resources[].{name: name, regionzone: zone, tagslabels: labels, instance_id: id, instance_state: status}\") }}" when: cluster_vars.type == "gcp" -- name: get_cluster_hosts_state/vmware | Get VMware cluster_hosts_state +- name: get_cluster_hosts_state_esxifree | Get VMware cluster_hosts_state block: - - name: get_cluster_hosts_state/vmware | Get existing VMware instance info + - name: get_cluster_hosts_state_esxifree | Get existing VMware instance info vmware_vm_info: - username: "{{ cluster_vars.esxi_username }}" - password: "{{ cluster_vars.esxi_password }}" + username: "{{ cluster_vars.username }}" + password: "{{ cluster_vars.password }}" hostname: "{{ cluster_vars.esxi_ip }}" validate_certs: no register: r__vmware_vm_info delegate_to: localhost run_once: true - - name: get_cluster_hosts_state/vmware | Get existing VMware instance facts + - name: get_cluster_hosts_state_esxifree | Get existing VMware instance facts vmware_guest_info: - username: "{{ cluster_vars.esxi_username }}" - password: "{{ cluster_vars.esxi_password }}" + username: "{{ cluster_vars.username }}" + password: "{{ cluster_vars.password }}" hostname: "{{ cluster_vars.esxi_ip }}" validate_certs: no datacenter: None @@ -72,21 +72,28 @@ delegate_to: localhost run_once: true - # Convert the annotations into a proper dictionary within the facts - - name: get_cluster_hosts_state/vmware | update r__vmware_guest_info result with json-parsed annotations + ## esxifree hosts must use the esxi 'annotations' field as json. They are stored as unconventional text in the vmx file, so must + ## be converted into inline-json within the facts. If the annotation field is not convertible to json, then we don't consider this VM part of the cluster. + - name: get_cluster_hosts_state_esxifree | update r__vmware_guest_info result with json-parsed annotations set_fact: r__vmware_guest_info: | - {% set res = r__vmware_guest_info -%} + {% set res = {'results': []} -%} {%- for result in r__vmware_guest_info.results -%} - {%- set _ = result.instance.update({'annotation': result.instance.annotation | json_loads_loose}) -%} + {%- set loadloose_res = result.instance.annotation | json_loads_loose -%} + {%- if loadloose_res | type_debug == 'dict' or loadloose_res | type_debug == 'list' -%} + {%- set _ = result.instance.update({'annotation': loadloose_res}) -%} + {%- set _ = res.results.append(result) -%} + {%- endif -%} {%- endfor -%} {{ res }} - - name: get_cluster_hosts_state/vmware | Set cluster_hosts_state + - name: get_cluster_hosts_state_esxifree | Set cluster_hosts_state set_fact: cluster_hosts_state: "{{ r__vmware_guest_info.results | json_query(\"[].{name: instance.hw_name, regionzone: None, tagslabels: instance.annotation, instance_id: instance.moid, instance_state: instance.hw_power_status}\") }}" + when: cluster_vars.type == "esxifree" + - name: get_cluster_hosts_state | cluster_hosts_state debug: msg="{{cluster_hosts_state}}" delegate_to: localhost diff --git a/cluster_hosts/tasks/get_cluster_hosts_target.yml b/cluster_hosts/tasks/get_cluster_hosts_target.yml index 3eda6a80..4dad737e 100644 --- a/cluster_hosts/tasks/get_cluster_hosts_target.yml +++ b/cluster_hosts/tasks/get_cluster_hosts_target.yml @@ -113,6 +113,19 @@ {{ res }} when: cluster_vars.type == "gcp" +- name: get_cluster_hosts_target/esxifree | esxifree-specific modifications to cluster_hosts_target + block: + - name: get_cluster_hosts_target/esxifree | Update cluster_hosts_target with volname (derived from the mountpoint) + set_fact: + cluster_hosts_target: | + {%- for host in cluster_hosts_target -%} + {%- for hostvol in host.auto_volumes -%} + {%- set _dummy = hostvol.update({'volname': hostvol.mountpoint | regex_replace('.*\/(.*)', '\\1')}) -%} + {%- endfor %} + {%- endfor %} + {{ cluster_hosts_target }} + when: cluster_vars.type == "esxifree" + - name: get_cluster_hosts_target | cluster_hosts_target debug: msg={{cluster_hosts_target}} delegate_to: localhost diff --git a/config/tasks/disks_auto.yml b/config/tasks/disks_auto.yml index 4ce7db03..ad3715c4 100644 --- a/config/tasks/disks_auto.yml +++ b/config/tasks/disks_auto.yml @@ -21,7 +21,8 @@ hostvols: | {% set res = [] -%} {% set tmp_blkvols = lsblk_volumes -%} - {%- for autovol in cluster_vars[buildenv].hosttype_vars[hostvars[inventory_hostname].hosttype].auto_volumes -%} + {% set inventory_hostname__no_suffix = inventory_hostname | regex_replace('-(?!.*-).*') -%} + {%- for autovol in cluster_hosts_target | to_json | from_json | json_query('[?starts_with(hostname, \'' + inventory_hostname__no_suffix + '\')].auto_volumes[]') -%} {%- set blkvolloop = namespace(break=false) -%} {%- for blkvol in tmp_blkvols if not blkvolloop.break -%} {%- if (autovol.volume_size*1073741824|int) == (blkvol.size_b|int) -%} diff --git a/create/tasks/esxifree.yml b/create/tasks/esxifree.yml new file mode 100644 index 00000000..4b6f2f4e --- /dev/null +++ b/create/tasks/esxifree.yml @@ -0,0 +1,39 @@ +--- + +- name: Create vmware instances from template + esxifree_guest: + hostname: "{{ cluster_vars.esxi_ip }}" + username: "{{ cluster_vars.username }}" + password: "{{ cluster_vars.password }}" + datastore: "{{ cluster_vars.datastore }}" + template: "{{ cluster_vars.image }}" + name: "{{ item.hostname }}" + state: present + hardware: "{{ {'version': cluster_vars.hardware_version} | combine({'num_cpus': item.flavor['num_cpus'], 'memory_mb': item.flavor['memory_mb']}) }}" + annotation: + Name: "{{item.hostname}}" + hosttype: "{{item.hosttype}}" + env: "{{buildenv}}" + cluster_name: "{{cluster_name}}" + owner: "{{lookup('env','USER')}}" + cluster_suffix: "{{cluster_suffix}}" + lifecycle_state: "current" + cloudinit_userdata: "{{ cluster_vars.cloudinit_userdata | default([]) }}" + disks: "{{ item.auto_volumes | json_query(\"[].{size_gb: volume_size, type: provisioning_type, volname: volname, src: src }\") | default([]) }}" + networks: "{{ cluster_vars[buildenv].networks | default([]) }}" + wait: true + register: esxi_instances + run_once: true + with_items: "{{ cluster_hosts_target }}" + async: 7200 + poll: 0 + +- name: Wait for instance creation to complete + async_status: + jid: "{{ item.ansible_job_id }}" + register: esxi_jobs + until: esxi_jobs.finished + retries: 300 + with_items: "{{ esxi_instances.results }}" + +#- debug: msg={{esxi_jobs.results}} diff --git a/dynamic_inventory/tasks/esxifree.yml b/dynamic_inventory/tasks/esxifree.yml new file mode 100644 index 00000000..a7e0d4fb --- /dev/null +++ b/dynamic_inventory/tasks/esxifree.yml @@ -0,0 +1,45 @@ +--- + +- name: dynamic_inventory/esxifree | Get existing VMware instance info + vmware_vm_info: + username: "{{ cluster_vars.username }}" + password: "{{ cluster_vars.password }}" + hostname: "{{ cluster_vars.esxi_ip }}" + validate_certs: no + register: r__vmware_vm_info + delegate_to: localhost + run_once: true + +- name: dynamic_inventory/esxifree | Get existing VMware instance facts + vmware_guest_info: + username: "{{ cluster_vars.username }}" + password: "{{ cluster_vars.password }}" + hostname: "{{ cluster_vars.esxi_ip }}" + validate_certs: no + datacenter: None + uuid: "{{item.uuid}}" + with_items: "{{ r__vmware_vm_info.virtual_machines | to_json | from_json | json_query(\"[?starts_with(guest_name, '\"+cluster_name+\"') && power_state=='poweredOn']\") }}" + register: r__vmware_guest_info + delegate_to: localhost + run_once: true + +## esxifree hosts must use the esxi 'annotations' field as json. They are stored as unconventional text in the vmx file, so must +## be converted into inline-json within the facts. If the annotation field is not convertible to json, then we don't consider this VM part of the cluster. +- name: dynamic_inventory/esxifree | Update r__vmware_guest_info result with json-parsed annotations + set_fact: + r__vmware_guest_info: | + {% set res = {'results': []} -%} + {%- for result in r__vmware_guest_info.results -%} + {%- set loadloose_res = result.instance.annotation | json_loads_loose -%} + {%- if loadloose_res | type_debug == 'dict' or loadloose_res | type_debug == 'list' -%} + {%- set _ = result.instance.update({'annotation': loadloose_res}) -%} + {%- set _ = res.results.append(result) -%} + {%- endif -%} + {%- endfor -%} + {{ res }} + +#- debug: msg={{r__vmware_guest_info}} + +- name: dynamic_inventory/esxifree | Set dynamic_inventory_flat + set_fact: + dynamic_inventory_flat: "{{ r__vmware_guest_info.results | json_query(\"[].{hosttype: instance.annotation.hosttype, hostname: item.guest_name, private_ip: item.ip_address, inventory_ip: item.ip_address}\") | default([]) }}" diff --git a/dynamic_inventory/tasks/main.yml b/dynamic_inventory/tasks/main.yml index 06a7d102..4c041104 100644 --- a/dynamic_inventory/tasks/main.yml +++ b/dynamic_inventory/tasks/main.yml @@ -22,7 +22,7 @@ - name: dynamic_inventory | Add hosts to dynamic inventory add_host: name: "{{ item.hostname }}" - groups: "{{ item.hosttype }},{{ cluster_name }},{{ clusterid }}{% if 'regionzone' in item %},{{ item.regionzone }}{% endif %}" + groups: "{{ item.hosttype }},{{ cluster_name }},{{ clusterid }}{%- if 'regionzone' in item -%},{{ item.regionzone }}{%- endif -%}" ansible_host: "{{ item.inventory_ip }}" hosttype: "{{ item.hosttype }}" regionzone: "{{ item.regionzone | default(omit) }}" @@ -40,7 +40,7 @@ {% if groupname not in ["all", "ungrouped"] -%} [{{ groupname }}] {% for hostname in groups[groupname] %} - {{ hostname }} ansible_host={{hostvars[hostname].ansible_host}} hosttype={{ hostvars[hostname].hosttype }} {% if 'regionzone' in hostvars[hostname] %}regionzone={{ hostvars[hostname].regionzone }}{% endif %}{{''}} + {{ hostname }} ansible_host={{hostvars[hostname].ansible_host}} hosttype={{ hostvars[hostname].hosttype }} {%- if 'regionzone' in hostvars[hostname] -%}regionzone={{ hostvars[hostname].regionzone }}{%- endif -%} {% endfor %} {% endif %} diff --git a/redeploy/__common/tasks/poweroff_vms.yml b/redeploy/__common/tasks/poweroff_vms.yml index b6cdad56..38e06772 100644 --- a/redeploy/__common/tasks/poweroff_vms.yml +++ b/redeploy/__common/tasks/poweroff_vms.yml @@ -54,4 +54,28 @@ retries: 300 with_items: "{{r__gcp_compute_instance.results}}" when: cluster_vars.type == "gcp" + + + - name: poweroff_vms | Power-off vmware VM(s) and set maintenance_mode=true + block: + - name: poweroff_vms | Set maintenance_mode label on esxifree VM(s) + esxifree_guest: + hostname: "{{ cluster_vars.esxi_ip }}" + username: "{{ cluster_vars.username }}" + password: "{{ cluster_vars.password }}" + name: "{{item.name}}" + state: present + annotation: "{{ item.tagslabels | combine({'maintenance_mode': 'true'}) }}" + with_items: "{{ hosts_to_stop }}" + + - name: poweroff_vms | Power-off esxifree VM(s) + esxifree_guest: + hostname: "{{ cluster_vars.esxi_ip }}" + username: "{{ cluster_vars.username }}" + password: "{{ cluster_vars.password }}" + name: "{{item.name}}" + state: shutdownguest + with_items: "{{ hosts_to_stop }}" + + when: cluster_vars.type == "esxifree" when: hosts_to_stop | length \ No newline at end of file diff --git a/redeploy/__common/tasks/poweron_vms.yml b/redeploy/__common/tasks/poweron_vms.yml index 551c4fad..363a9aee 100644 --- a/redeploy/__common/tasks/poweron_vms.yml +++ b/redeploy/__common/tasks/poweron_vms.yml @@ -44,4 +44,16 @@ retries: 300 with_items: "{{r__gcp_compute_instance.results}}" when: cluster_vars.type == "gcp" + + + - name: poweron_vms | Power-on esxifree VM(s) + block: + - name: poweron_vms | Power-on esxifree VM(s) + esxifree_guest: + hostname: "{{ cluster_vars.esxi_ip }}" + username: "{{ cluster_vars.username }}" + password: "{{ cluster_vars.password }}" + name: "{{item.name}}" + state: powered-on + when: cluster_vars.type == "esxifree" when: hosts_to_start | length \ No newline at end of file diff --git a/redeploy/__common/tasks/set_lifecycle_state_label.yml b/redeploy/__common/tasks/set_lifecycle_state_label.yml index 045171e2..93c582be 100644 --- a/redeploy/__common/tasks/set_lifecycle_state_label.yml +++ b/redeploy/__common/tasks/set_lifecycle_state_label.yml @@ -27,4 +27,16 @@ labels: "{{ item.tagslabels | combine({'lifecycle_state': new_state}) }}" with_items: "{{ hosts_to_relabel }}" when: cluster_vars.type == "gcp" + + + - name: set_lifecycle_state_label | Change lifecycle_state label on esxifree VM + esxifree_guest: + hostname: "{{ cluster_vars.esxi_ip }}" + username: "{{ cluster_vars.username }}" + password: "{{ cluster_vars.password }}" + name: "{{item.name}}" + state: present + annotation: "{{ item.tagslabels | combine({'lifecycle_state': new_state}) }}" + with_items: "{{ hosts_to_relabel }}" + when: cluster_vars.type == "esxifree" when: hosts_to_relabel | length \ No newline at end of file diff --git a/redeploy/_scheme_rmvm_keepdisk_only/tasks/_add_diskinfo_esxifree.yml b/redeploy/_scheme_rmvm_keepdisk_only/tasks/_add_diskinfo_esxifree.yml new file mode 100644 index 00000000..40e7c103 --- /dev/null +++ b/redeploy/_scheme_rmvm_keepdisk_only/tasks/_add_diskinfo_esxifree.yml @@ -0,0 +1,36 @@ +--- + +- name: _get_diskinfo_esxifree | hosts_to_stop + debug: msg="{{hosts_to_stop}}" + +- name: _get_diskinfo_esxifree | vmware_guest_disk_info + vmware_guest_disk_info: + username: "{{ cluster_vars.username }}" + password: "{{ cluster_vars.password }}" + hostname: "{{ cluster_vars.esxi_ip }}" + datacenter: ha-datacenter + validate_certs: no + name: "{{item.name}}" + with_items: "{{hosts_to_stop}}" + register: r__vmware_guest_disk_info + +#- name: _get_diskinfo_esxifree | debug r__vmware_guest_disk_info +# debug: msg={{r__vmware_guest_disk_info}} + +- assert: { that: "r__vmware_guest_disk_info | json_query(\"results[].guest_disk_info.*[] | [?backing_datastore!='\" + cluster_vars.datastore + \"']\") | length == 0", msg: "Move is only possible if disks are on the same datastore." } + when: _scheme_rmvm_keepdisk_only__copy_or_move == "move" + +- name: _get_diskinfo_esxifree | augment cluster_host_redeploying's auto_volumes with source disk info + set_fact: + cluster_host_redeploying: | + {% set res = _cluster_host_redeploying_loopvar -%} + {%- for autovol in res.auto_volumes -%} + {%- for host_to_stop_diskinfo_result in r__vmware_guest_disk_info.results -%} + {%- if res.hostname | regex_replace('-(?!.*-).*') == host_to_stop_diskinfo_result.item.name | regex_replace('-(?!.*-).*') -%} + {%- for host_to_stop_diskinfo in host_to_stop_diskinfo_result.guest_disk_info | to_json | from_json | json_query('*| [?unit_number!=`0` && backing_type==\'FlatVer2\' && contains(backing_filename, \'--' + autovol.volname + '.vmdk\')]') -%} + {%- set _ = autovol.update({'volume_size': (host_to_stop_diskinfo.capacity_in_bytes/1073741824)|int, 'src': {'backing_filename': host_to_stop_diskinfo.backing_filename, 'copy_or_move': _scheme_rmvm_keepdisk_only__copy_or_move }}) -%} + {%- endfor -%} + {%- endif -%} + {%- endfor -%} + {%- endfor -%} + {{res}} diff --git a/redeploy/_scheme_rmvm_keepdisk_only/tasks/by_hosttype.yml b/redeploy/_scheme_rmvm_keepdisk_only/tasks/by_hosttype.yml new file mode 100644 index 00000000..1bebc776 --- /dev/null +++ b/redeploy/_scheme_rmvm_keepdisk_only/tasks/by_hosttype.yml @@ -0,0 +1,23 @@ +--- + +- name: set hosts_to_redeploy if canary==start + set_fact: hosts_to_redeploy={{(cluster_hosts_target_by_hosttype[hosttype] | sort(attribute='hostname'))[:1]}} + when: (canary is defined and canary=="start") + +- name: set hosts_to_redeploy if canary==finish + set_fact: hosts_to_redeploy={{(cluster_hosts_target_by_hosttype[hosttype] | sort(attribute='hostname'))[1:]}} + when: (canary is defined and canary=="finish") + +- name: set hosts_to_redeploy if canary==none + set_fact: hosts_to_redeploy={{(cluster_hosts_target_by_hosttype[hosttype] | sort(attribute='hostname'))}} + when: (canary is defined and canary=="none") + +- debug: msg="Canary redeploy ({{canary}}) selected; deleting and redeploying [{{hosts_to_redeploy | json_query('[].hostname') | join(', ')}}]" + when: (canary is defined) + + +- name: Run redeploy per host. Delete one at a time, then reprovision. + include_tasks: by_hosttype_by_host.yml + with_items: "{{ hosts_to_redeploy }}" + loop_control: + loop_var: _cluster_host_redeploying_loopvar diff --git a/redeploy/_scheme_rmvm_keepdisk_only/tasks/by_hosttype_by_host.yml b/redeploy/_scheme_rmvm_keepdisk_only/tasks/by_hosttype_by_host.yml new file mode 100644 index 00000000..14276ef3 --- /dev/null +++ b/redeploy/_scheme_rmvm_keepdisk_only/tasks/by_hosttype_by_host.yml @@ -0,0 +1,51 @@ +--- + +- debug: msg="Attempting to redeploy {{_cluster_host_redeploying_loopvar.hostname}}" + +- name: by_hosttype_by_host | stop/ remove previous instance + block: + - name: by_hosttype_by_host | run predeleterole role + include_role: + name: "{{predeleterole}}" + when: predeleterole is defined and predeleterole != "" + + - name: by_hosttype_by_host | Power off old VM + include_role: + name: clusterverse/redeploy/__common + tasks_from: poweroff_vms.yml + + - name: by_hosttype_by_host | re-acquire the dynamic inventory + include_role: + name: clusterverse/dynamic_inventory + + - name: by_hosttype_by_host | re-acquire cluster_hosts_target and cluster_hosts_state + import_role: + name: clusterverse/cluster_hosts + + - name: by_hosttype_by_host | create cluster_host_redeploying with the disk info from hosts_to_stop + include_role: + name: "{{role_path}}" + tasks_from: "_add_diskinfo_{{cluster_vars.type}}.yml" + vars: + _root_cluster_host_redeploying: "{{_cluster_host_redeploying_loopvar.hostname | regex_replace('-(?!.*-).*')}}" #Remove the cluster_suffix from the hostname + hosts_to_stop: "{{ cluster_hosts_state | to_json | from_json | json_query(\"[?tagslabels.lifecycle_state=='retiring' && starts_with(name, '\" + _root_cluster_host_redeploying + \"')]\") }}" + +- name: by_hosttype_by_host | cluster_host_redeploying + debug: msg={{cluster_host_redeploying}} + +- name: "by_hosttype_by_host | Run {{mainclusteryml}} to add {{cluster_host_redeploying.hostname}} to cluster" + shell: "{{ (argv | join(' ')) | regex_replace('redeploy.yml', mainclusteryml) }} -e cluster_suffix={{cluster_suffix}} -e '{'cluster_hosts_target': [{{cluster_host_redeploying | to_json}}]}'" + register: r__mainclusteryml + no_log: True + ignore_errors: yes +- debug: msg="{{[r__mainclusteryml.stdout_lines] + [r__mainclusteryml.stderr_lines]}}" + failed_when: r__mainclusteryml is failed +# when: r__mainclusteryml is failed or (debug_nested_log_output is defined and debug_nested_log_output|bool) + +- name: by_hosttype_by_host | re-acquire the dynamic inventory + include_role: + name: clusterverse/dynamic_inventory + +- name: by_hosttype_by_host | re-acquire cluster_hosts_target and cluster_hosts_state + import_role: + name: clusterverse/cluster_hosts diff --git a/redeploy/_scheme_rmvm_keepdisk_only/tasks/main.yml b/redeploy/_scheme_rmvm_keepdisk_only/tasks/main.yml new file mode 100644 index 00000000..67aeba86 --- /dev/null +++ b/redeploy/_scheme_rmvm_keepdisk_only/tasks/main.yml @@ -0,0 +1,69 @@ +--- + +- name: Preflight check + block: + - assert: { that: "_scheme_rmvm_keepdisk_only__copy_or_move is defined and _scheme_rmvm_keepdisk_only__copy_or_move in ['copy', 'move']", msg: "ERROR - _scheme_rmvm_keepdisk_only__copy_or_move must be defined and set to either 'copy' or 'move'" } + + - assert: { that: "non_current_hosts | length == 0", msg: "ERROR - All VMs must be in the 'current' lifecycle_state. Those not [{{non_current_hosts | join(',')}}]" } + vars: + non_current_hosts: "{{ cluster_hosts_state | json_query(\"[?tagslabels.lifecycle_state!='current'].name\") }}" + when: canary=="start" or canary=="none" + + - assert: + that: "{{chs_hosts | difference(chf_hosts) | length==0}}" + msg: "Cannot use this scheme to redeploy to smaller cluster; [{{ chs_hosts | join(',') }}] > [{{ chf_hosts | join(',') }}]" + vars: + chf_hosts: "{{ cluster_hosts_target | json_query(\"[].hostname\") | map('regex_replace', '-(?!.*-).*') | list }}" + chs_hosts: "{{ cluster_hosts_state | json_query(\"[].name\") | map('regex_replace', '-(?!.*-).*') | list }}" + +- name: Redeploy setup + block: + - name: Change lifecycle_state label from 'current' to 'retiring' + include_role: + name: clusterverse/redeploy/__common + tasks_from: set_lifecycle_state_label.yml + vars: + hosts_to_relabel: "{{ cluster_hosts_state | json_query(\"[?tagslabels.lifecycle_state=='current']\") }}" + new_state: "retiring" + when: ('retiring' not in (cluster_hosts_state | map(attribute='tagslabels.lifecycle_state'))) + + - name: re-acquire cluster_hosts_target and cluster_hosts_state + include_role: + name: clusterverse/cluster_hosts + public: yes + + - assert: { that: "cluster_hosts_state | json_query(\"[?tagslabels.cluster_suffix == '\"+ cluster_suffix +\"']\") | length == 0", msg: "Please ensure cluster_suffix ({{cluster_suffix}}) is not already set on the cluster" } + when: cluster_suffix is defined + when: (canary=="start" or canary=="none") + +- name: Run redeploy per hosttype. Create one at a time, then stop previous. + include_tasks: by_hosttype.yml + with_items: "{{ myhosttypes_array }}" + loop_control: + loop_var: hosttype + vars: + cluster_hosts_target_by_hosttype: "{{cluster_hosts_target | dict_agg('hosttype')}}" + myhosttypes_array: "{%- if myhosttypes is defined -%} {{ myhosttypes.split(',') }} {%- else -%} {{ cluster_hosts_target_by_hosttype.keys() | list }} {%- endif -%}" + when: canary!="tidy" + + +- name: "Tidy up powered-down, non-current instances. NOTE: Must do clean_dns first, because both clean_dns and clean_vms have the cluster_hosts role as a dependency, which when run after clean_vms, will be empty." + block: + - assert: { that: "'current' in (cluster_hosts_state | map(attribute='tagslabels.lifecycle_state'))", msg: "ERROR - Cannot tidy when there are no machines in the 'current' lifecycle_state. Please use '-e clean=_all_'." } + + - include_role: + name: clusterverse/clean + tasks_from: clean_dns.yml + when: (hosts_to_clean | length) and (cluster_vars.dns_server is defined and cluster_vars.dns_server != "") and (cluster_vars.dns_user_domain is defined and cluster_vars.dns_user_domain != "") + + - include_role: + name: clusterverse/clean + tasks_from: "clean_vms_{{cluster_vars.type}}.yml" + when: (hosts_to_clean | length) + + - debug: + msg: "tidy | No hosts to tidy. Only powered-down, non-current machines with be tidied; to clean other machines, please use the '-e clean=' extra variable." + when: hosts_to_clean | length == 0 + vars: + hosts_to_clean: "{{ cluster_hosts_state | json_query(\"[?tagslabels.lifecycle_state!='current' && !(contains('RUNNING,running', instance_state))]\") }}" + when: canary=="tidy" or ((canary=="none" or canary=="finish") and canary_tidy_on_success is defined and canary_tidy_on_success|bool) diff --git a/redeploy/tasks/main.yml b/redeploy/tasks/main.yml index bb4f62ee..e4788e94 100644 --- a/redeploy/tasks/main.yml +++ b/redeploy/tasks/main.yml @@ -4,7 +4,7 @@ block: - assert: { that: "clean is not defined", msg: "Must not set the 'clean' variable for a redeploy" } - assert: { that: "canary is defined and (canary is defined and canary in ['start', 'finish', 'none', 'tidy', 'revert'])", msg: "Canary must be 'start', 'finish', 'none', 'tidy' or 'revert'" } - - assert: { that: "redeploy_scheme is defined" } + - assert: { that: "redeploy_scheme is defined and redeploy_scheme in redeploy_schemes_supported" } - assert: { that: "cluster_hosts_state | length", msg: "Redeploy only possible with an existing cluster." } - name: "Run the {{redeploy_scheme}} redploy scheme" From 3bcd7c5d3f893a086d9e7cb489b548cd9fdcfde8 Mon Sep 17 00:00:00 2001 From: Dougal Seeley Date: Sun, 23 Aug 2020 07:36:15 +0100 Subject: [PATCH 24/58] Add esxifree_guest.py as an included library dependency --- _dependencies/library/esxifree_guest.py | 1071 +++++++++++++++++ _dependencies/library/esxifree_guest_LICENSE | 29 + .../library/esxifree_guest_README.md | 53 + 3 files changed, 1153 insertions(+) create mode 100644 _dependencies/library/esxifree_guest.py create mode 100644 _dependencies/library/esxifree_guest_LICENSE create mode 100644 _dependencies/library/esxifree_guest_README.md diff --git a/_dependencies/library/esxifree_guest.py b/_dependencies/library/esxifree_guest.py new file mode 100644 index 00000000..481dbbf9 --- /dev/null +++ b/_dependencies/library/esxifree_guest.py @@ -0,0 +1,1071 @@ +#!/usr/bin/env python3 +# -*- coding: utf-8 -*- +# +# GNU General Public License v3.0+ (see COPYING or https://www.gnu.org/licenses/gpl-3.0.txt) + +from __future__ import absolute_import, division, print_function + +__metaclass__ = type + +ANSIBLE_METADATA = {'metadata_version': '1.1', 'status': ['preview'], 'supported_by': 'community'} + +DOCUMENTATION = r''' +--- +module: esxifree_guest +short_description: Manages virtual machines in ESXi without a dependency on the vSphere/ vCenter API. +description: > + This module can be used to create new virtual machines from templates or other virtual machines, + manage power state of virtual machine such as power on, power off, suspend, shutdown, reboot, restart etc., +version_added: '2.7' +author: +- Dougal Seeley (ansible@dougalseeley.com) +requirements: +- python >= 2.7 +- paramiko +notes: + - Please make sure that the user used for esxifree_guest should have correct level of privileges. + - Tested on vSphere 6.7 +options: + hostname: + description: + - The hostname or IP address of the ESXi server. + required: true + type: str + username: + description: + - The username to access the ESXi server at C(hostname). + required: true + type: str + password: + description: + - The password of C(username) for the ESXi server, or the password for the private key (if required). + required: true + type: str + state: + description: + - Specify the state the virtual machine should be in. + - 'If C(state) is set to C(present) and virtual machine exists, ensure the virtual machine + configurations conforms to task arguments.' + - 'If C(state) is set to C(absent) and virtual machine exists, then the specified virtual machine + is removed with its associated components.' + - 'If C(state) is set to one of the following C(poweredon), C(poweredoff), C(present) + and virtual machine does not exists, then virtual machine is deployed with given parameters.' + - 'If C(state) is set to C(poweredon) and virtual machine exists with powerstate other than powered on, + then the specified virtual machine is powered on.' + - 'If C(state) is set to C(poweredoff) and virtual machine exists with powerstate other than powered off, + then the specified virtual machine is powered off.' + - 'If C(state) is set to C(shutdownguest) and virtual machine exists, then the virtual machine is shutdown.' + - 'If C(state) is set to C(rebootguest) and virtual machine exists, then the virtual machine is rebooted.' + choices: [ present, absent, poweredon, poweredoff, shutdownguest, rebootguest ] + default: present + name: + description: + - Name of the virtual machine to work with. + - Virtual machine names in ESXi are unique + - This parameter is required, if C(state) is set to C(present) and virtual machine does not exists. + - This parameter is case sensitive. + type: str + moid: + description: + - Managed Object ID of the virtual machine to manage + - This is required if C(name) is not supplied. + - If virtual machine does not exists, then this parameter is ignored. + - Will be ignored on virtual machine creation + type: str + template: + description: + - Template or existing virtual machine used to create new virtual machine. + - If this value is not set, virtual machine is created without using a template. + - If the virtual machine already exists, this parameter will be ignored. + - This parameter is case sensitive. + type: str + hardware: + description: + - Manage virtual machine's hardware attributes. + type: dict + suboptions: + version: + description: + - The Virtual machine hardware version. Default is 15 (ESXi 6.7U2 and onwards). + type: int + default: 15 + required: false + num_cpus: + description: + - Number of CPUs. + - C(num_cpus) must be a multiple of C(num_cpu_cores_per_socket). + type: int + default: 2 + required: false + num_cpu_cores_per_socket: + description: + - Number of Cores Per Socket. + type: int + default: 1 + required: false + hotadd_cpu: + description: + - Allow virtual CPUs to be added while the virtual machine is running. + type: bool + required: false + memory_mb: + description: + - Amount of memory in MB. + type: int + default: 2048 + required: false + memory_reservation_lock: + description: + - If set true, memory resource reservation for the virtual machine + will always be equal to the virtual machine's memory size. + type: bool + required: false + hotadd_memory: + description: + - Allow memory to be added while the virtual machine is running. + type: bool + required: false + guest_id: + description: + - Set the guest ID. + - This parameter is case sensitive. + - 'Examples:' + - " virtual machine with RHEL7 64 bit, will be 'rhel7-64'" + - " virtual machine with CentOS 7 (64-bit), will be 'centos7-64'" + - " virtual machine with Debian 9 (Stretch) 64 bit, will be 'debian9-64'" + - " virtual machine with Ubuntu 64 bit, will be 'ubuntu-64'" + - " virtual machine with Windows 10 (64 bit), will be 'windows9-64'" + - " virtual machine with Other (64 bit), will be 'other-64'" + - This field is required when creating a virtual machine, not required when creating from the template. + type: str + default: ubuntu-64 + disks: + description: + - A list of disks to add (or create via cloning). + - Resizing disks is not supported. + - Removing existing disks of the virtual machine is not supported. + required: false + type: list + suboptions: + boot: + description: + - Indicates that this is a boot disk. + required: false + default: no + type: bool + size_gb: + description: Specifies the size of the disk in base-2 GB. + type: int + required: true + type: + description: + - Type of disk provisioning + choices: [thin, thick, eagerzeroedthick] + type: str + required: false + default: thin + volname: + description: + - Volume name. This will be a suffix of the vmdk file, e.g. "testdisk" on a VM named "mynewvm", would yield mynewvm--testdisk.vmdk + type: str + required: true + src: + description: + - The source disk from which to create this disk. + required: false + type: dict + suboptions: + backing_filename: + description: + - The source file, e.g. "[datastore1] linux_dev/linux_dev--webdata.vmdk" + type: str + copy_or_move + description: + - Whether to copy (clone) from the source datastore, or move the file. Move will fail if source and destination datastore differ. + choices: [copy, move] + + cdrom: + description: + - A CD-ROM configuration for the virtual machine. + - 'Valid attributes are:' + - ' - C(type) (string): The type of CD-ROM, valid options are C(none), C(client) or C(iso). With C(none) the CD-ROM will be disconnected but present.' + - ' - C(iso_path) (string): The datastore path to the ISO file to use, in the form of C([datastore1] path/to/file.iso). Required if type is set C(iso).' + wait: + description: + - On creation, wait for the instance to obtain its IP address before returning. + type: bool + required: false + default: true + wait_timeout: + description: + - How long before wait gives up, in seconds. + type: int + required: false + default: 180 + force: + description: + - Delete the existing host if it exists. Use with extreme care! + type: bool + required: false + default: false + customvalues: + description: + - Define a list of custom values to set on virtual machine. + - A custom value object takes two fields C(key) and C(value). + - Incorrect key and values will be ignored. + version_added: '2.3' + cloudinit_userdata: + description: + - A list of userdata (per user) as defined U(https://cloudinit.readthedocs.io/en/latest/topics/examples.html). The + VM must already have cloud-init-vmware-guestinfo installed U(https://github.com/vmware/cloud-init-vmware-guestinfo) + networks: + description: + - A list of networks (in the order of the NICs). + - Removing NICs is not allowed, while reconfiguring the virtual machine. + - All parameters and VMware object names are case sensetive. + - 'One of the below parameters is required per entry:' + - ' - C(networkName) (string): Name of the portgroup for this interface. + - ' - C(virtualDev) (string): Virtual network device (one of C(e1000e), C(vmxnet3) (default), C(sriov)).' + - 'Optional parameters per entry (used for OS customization):' + - ' - C(cloudinit_ethernets) (dict): A list of C(ethernets) within the definition of C(Networking Config Version 2) + defined in U(https://cloudinit.readthedocs.io/en/latest/topics/network-config-format-v2.html)'. The + VM must already have cloud-init-vmware-guestinfo installed U(https://github.com/vmware/cloud-init-vmware-guestinfo) + datastore: + description: + - Specify datastore or datastore cluster to provision virtual machine. + type: str + required: true + +''' +EXAMPLES = r''' +- name: Create a virtual machine + esxifree_guest: + hostname: "192.168.1.3" + username: "svc" + password: "my_passsword" + datastore: "datastore1" + name: "test_asdf" + state: present + guest_id: ubuntu-64 + hardware: {"version": "15", "num_cpus": "2", "memory_mb": "2048"} + cloudinit_userdata: + - name: dougal + primary_group: dougal + sudo: "ALL=(ALL) NOPASSWD:ALL" + groups: "admin" + home: "/media/filestore/home/dougal" + ssh_import_id: None + lock_passwd: false + passwd: $6$j212wezy$7...YPYb2F + ssh_authorized_keys: ['ssh-rsa AAAAB3NzaC1yc2EAAAADAQABAAACA+.................GIMhdojtl6mzVn38vXMzSL29LQ== ansible@dougalseeley.com'] + disks: + - {"boot": true, "size_gb": 16, "type": "thin"} + - {"size_gb": 2, "type": "thin", "volname": "test_new"} + - {"size_gb": 1, "type": "thin", "volname": "test_clone", "src": {"backing_filename": "[datastore1] linux_dev/linux_dev--webdata.vmdk", "copy_or_move": "copy"}}], + cdrom: {"type": "iso", "iso_path": "/vmfs/volumes/4tb-evo860-ssd/ISOs/ubuntu-18.04.4-server-amd64.iso"}, + networks: + - networkName: VM Network + virtualDev: vmxnet3 + cloudinit_ethernets: + eth0: + addresses: ["192.168.1.8/25"] + dhcp4: false + gateway4: 192.168.1.1 + nameservers: + addresses: ["192.168.1.2", "8.8.8.8", "8.8.4.4"] + search: ["local.dougalseeley.com"] + delegate_to: localhost + +- name: Clone a virtual machine + esxifree_guest: + hostname: "192.168.1.3" + username: "svc" + password: "my_passsword" + datastore: "datastore1" + template: "ubuntu1804-packer-template" + name: "test_asdf" + state: present + guest_id: ubuntu-64 + hardware: {"version": "15", "num_cpus": "2", "memory_mb": "2048"} + cloudinit_userdata: + - default + - name: dougal + primary_group: dougal + sudo: "ALL=(ALL) NOPASSWD:ALL" + groups: "admin" + home: "/media/filestore/home/dougal" + ssh_import_id: None + lock_passwd: true + ssh_authorized_keys: ['ssh-rsa AAAAB3NzaC1yc2EAAAADAQABAAACA+.................GIMhdojtl6mzVn38vXMzSL29LQ== ansible@dougalseeley.com'] + disks: + - {"size_gb": 2, "type": "thin", "volname": "test_new"} + - {"size_gb": 1, "type": "thin", "volname": "test_clone", "src": {"backing_filename": "[datastore1] linux_dev/linux_dev--webdata.vmdk", "copy_or_move": "copy"}}], + networks: + - networkName: VM Network + virtualDev: vmxnet3 + cloudinit_ethernets: + eth0: + addresses: ["192.168.1.8/25"] + dhcp4: false + gateway4: 192.168.1.1 + nameservers: + addresses: ["192.168.1.2", "8.8.8.8", "8.8.4.4"] + search: ["local.dougalseeley.com"] + delegate_to: localhost + +- name: Delete a virtual machine + esxifree_guest: + hostname: "{{ esxi_ip }}" + username: "{{ username }}" + password: "{{ password }}" + name: test_vm_0001 + state: absent + delegate_to: localhost +''' + +RETURN = r''' +instance: + description: metadata about the new virtual machine + returned: always + type: dict + sample: None +''' + +import time +import re +import json +import socket +import collections +import paramiko +import sys +import base64 +import yaml +import errno # For the python2.7 IOError, because FileNotFound is for python3 + +# define a custom yaml representer to force quoted strings +yaml.add_representer(str, lambda dumper, data: dumper.represent_scalar('tag:yaml.org,2002:str', data, style='"')) + +# For the soap client +try: + from urllib.request import Request, build_opener, HTTPSHandler, HTTPCookieProcessor + from urllib.response import addinfourl + from urllib.error import HTTPError + from http.cookiejar import CookieJar + from http.client import HTTPResponse +except ImportError: + from urllib2 import Request, build_opener, HTTPError, HTTPSHandler, HTTPCookieProcessor, addinfourl + from cookielib import CookieJar + from httplib import HTTPResponse +import ssl +import xml.dom.minidom + +if sys.version_info[0] < 3: + from io import BytesIO as StringIO +else: + from io import StringIO + +# paramiko.util.log_to_file("paramiko.log") +# paramiko.common.logging.basicConfig(level=paramiko.common.DEBUG) + +try: + from ansible.module_utils.basic import AnsibleModule +except: + pass + + +# Executes soap requests on the remote host. +class vmw_soap_client(object): + def __init__(self, host, username, password): + self.vmware_soap_session_cookie = None + self.host = host + response, cookies = self.send_req("<_this>ServiceInstance") + sessionManager_name = xml.dom.minidom.parseString(response.read()).getElementsByTagName("sessionManager")[0].firstChild.data + + response, cookies = self.send_req("<_this>" + sessionManager_name + "" + username + "" + password + "") + self.vmware_soap_session_cookie = cookies['vmware_soap_session'].value + + def send_req(self, envelope_body=None): + envelope = '' + '' + str(envelope_body) + '' + cj = CookieJar() + req = Request( + url='https://' + self.host + '/sdk/vimService.wsdl', data=envelope.encode(), + headers={"Content-Type": "text/xml", "SOAPAction": "urn:vim25/6.7.3", "Accept": "*/*", "Cookie": "vmware_client=VMware; vmware_soap_session=" + str(self.vmware_soap_session_cookie)}) + + opener = build_opener(HTTPSHandler(context=ssl._create_unverified_context()), HTTPCookieProcessor(cj)) + try: + response = opener.open(req, timeout=30) + except HTTPError as err: + response = str(err) + cookies = {i.name: i for i in list(cj)} + return (response[0] if isinstance(response, list) else response, cookies) # If the cookiejar contained anything, we get a list of two responses + + def wait_for_task(self, task, timeout=30): + time_s = int(timeout) + while time_s > 0: + response, cookies = self.send_req('<_this type="PropertyCollector">ha-property-collectorTaskfalseinfo' + task + 'false') + if isinstance(response, HTTPResponse) or isinstance(response, addinfourl): + xmldom = xml.dom.minidom.parseString(response.read()) + if len(xmldom.getElementsByTagName('state')): + if xmldom.getElementsByTagName('state')[0].firstChild.data == 'success': + response = xmldom.getElementsByTagName('state')[0].firstChild.data + break + elif xmldom.getElementsByTagName('state')[0].firstChild.data == 'error': + response = str(xmldom.toxml()) + break + else: + time.sleep(1) + time_s = time_s - 1 + else: + break + return response + + +# Executes a command on the remote host. +class SSHCmdExec(object): + def __init__(self, hostname, username=None, password=None, pkeyfile=None, pkeystr=None): + self.hostname = hostname + + try: + if pkeystr and pkeystr != "": + pkey_fromstr = paramiko.RSAKey.from_private_key(StringIO(pkeystr), password) + if pkeyfile and pkeyfile != "": + pkey_fromfile = paramiko.RSAKey.from_private_key_file(pkeyfile, password) + except paramiko.ssh_exception.PasswordRequiredException as auth_err: + print("Authentication failure, Password required" + "\n\n" + str(auth_err)) + exit(1) + except paramiko.ssh_exception.SSHException as auth_err: + print("Authentication failure, SSHException" + "\n\n" + str(auth_err)) + exit(1) + except: + print("Unexpected error: ", sys.exc_info()[0]) + raise + else: + if pkeystr: + self.pkey = pkey_fromstr + if pkeyfile: + if pkey_fromstr != pkey_fromfile: + print("Both private key file and private key string specified and not equal!") + exit(1) + elif pkeyfile: + self.pkey = pkey_fromfile + + # Create instance of SSHClient object + self.remote_conn_client = paramiko.SSHClient() + self.remote_conn_client.set_missing_host_key_policy(paramiko.AutoAddPolicy()) + + # initiate SSH connection + try: + if hasattr(self, 'pkey'): + self.remote_conn_client.connect(hostname=hostname, username=username, pkey=self.pkey, timeout=10, look_for_keys=False, allow_agent=False) + else: + self.remote_conn_client.connect(hostname=hostname, username=username, password=password, timeout=10, look_for_keys=False, allow_agent=False) + except socket.error as sock_err: + print("Connection timed-out to " + hostname) # + "\n\n" + str(sock_err) + exit(1) + except paramiko.ssh_exception.AuthenticationException as auth_err: + print("Authentication failure, unable to connect to " + hostname + " as " + username + "\n\n" + str(auth_err) + "\n\n" + str(sys.exc_info()[0])) # + str(auth_err)) + exit(1) + except: + print("Unexpected error: ", sys.exc_info()[0]) + raise + + # print("SSH connection established to " + hostname + " as " + username) + + def get_sftpClient(self): + return self.remote_conn_client.open_sftp() + + # execute the command and wait for it to finish + def exec_command(self, command_string): + # print("Command is: {0}".format(command_string)) + + (stdin, stdout, stderr) = self.remote_conn_client.exec_command(command_string) + if stdout.channel.recv_exit_status() != 0: # Blocking call + raise IOError(stderr.read()) + + return stdin, stdout, stderr + + +class esxiFreeScraper(object): + vmx_skeleton = collections.OrderedDict() + vmx_skeleton['.encoding'] = "UTF-8" + vmx_skeleton['config.version'] = "8" + vmx_skeleton['pciBridge0.present'] = "TRUE" + vmx_skeleton['svga.present'] = "TRUE" + vmx_skeleton['svga.autodetect'] = "TRUE" + vmx_skeleton['pciBridge4.present'] = "TRUE" + vmx_skeleton['pciBridge4.virtualDev'] = "pcieRootPort" + vmx_skeleton['pciBridge4.functions'] = "8" + vmx_skeleton['pciBridge5.present'] = "TRUE" + vmx_skeleton['pciBridge5.virtualDev'] = "pcieRootPort" + vmx_skeleton['pciBridge5.functions'] = "8" + vmx_skeleton['pciBridge6.present'] = "TRUE" + vmx_skeleton['pciBridge6.virtualDev'] = "pcieRootPort" + vmx_skeleton['pciBridge6.functions'] = "8" + vmx_skeleton['pciBridge7.present'] = "TRUE" + vmx_skeleton['pciBridge7.virtualDev'] = "pcieRootPort" + vmx_skeleton['pciBridge7.functions'] = "8" + vmx_skeleton['vmci0.present'] = "TRUE" + vmx_skeleton['hpet0.present'] = "TRUE" + vmx_skeleton['floppy0.present'] = "FALSE" + vmx_skeleton['usb.present'] = "TRUE" + vmx_skeleton['ehci.present'] = "TRUE" + vmx_skeleton['tools.syncTime'] = "TRUE" + vmx_skeleton['scsi0.virtualDev'] = "pvscsi" + vmx_skeleton['scsi0.present'] = "TRUE" + + def __init__(self, hostname, username='root', password=None, name=None, moid=None): + self.soap_client = vmw_soap_client(host=hostname, username=username, password=password) + self.esxiCnx = SSHCmdExec(hostname=hostname, username=username, password=password) + self.name, self.moid = self.get_vm(name, moid) + if self.moid is None: + self.name = name + + def get_vm(self, name=None, moid=None): + (stdin, stdout, stderr) = self.esxiCnx.exec_command("vim-cmd vmsvc/getallvms") + allVms = stdout.readlines() + for vm in allVms: + vm_params = re.search('^(?P\d+)\s+(?P.*?)\s+(?P\[.*?\])\s+(?P.*?)\s+(?P.*?)\s+(?P.*?)(:\s+(?P.*))?$', vm) + if vm_params and vm_params.group('vmname') and vm_params.group('vmid') and ((name and name == vm_params.group('vmname')) or (moid and moid == vm_params.group('vmid'))): + return vm_params.group('vmname'), vm_params.group('vmid') + return None, None + + def get_vmx(self, moid): + (stdin, stdout, stderr) = self.esxiCnx.exec_command("vim-cmd vmsvc/get.filelayout " + str(moid) + " | grep 'vmPathName = ' | sed -r 's/^\s+vmPathName = \"(.*?)\",/\\1/g'") + vmxPathName = stdout.read().decode('UTF-8').lstrip("\r\n").rstrip(" \r\n") + vmxPath = re.sub(r"^\[(.*?)]\s+(.*?)$", r"/vmfs/volumes/\1/\2", vmxPathName) + + if vmxPath: + sftp_cnx = self.esxiCnx.get_sftpClient() + vmxFileDict = {} + for vmxline in sftp_cnx.file(vmxPath).readlines(): + vmxline_params = re.search('^(?P.*?)\s*=\s*(?P.*)$', vmxline) + if vmxline_params and vmxline_params.group('key') and vmxline_params.group('value'): + vmxFileDict[vmxline_params.group('key').strip(" \"\r\n").lower()] = vmxline_params.group('value').strip(" \"\r\n") + + return vmxPath, vmxFileDict + + def put_vmx(self, vmxDict, vmxPath): + # print(json.dumps(vmxDict, sort_keys=True, indent=4, separators=(',', ': '))) + vmxDict = collections.OrderedDict(sorted(vmxDict.items())) + vmxStr = StringIO() + for vmxKey, vmxVal in vmxDict.items(): + vmxStr.write(str(vmxKey.lower()) + " = " + "\"" + str(vmxVal) + "\"\n") + vmxStr.seek(0) + sftp_cnx = self.esxiCnx.get_sftpClient() + try: + sftp_cnx.stat(vmxPath) + sftp_cnx.remove(vmxPath) + except IOError as e: # python 2.7 + if e.errno == errno.ENOENT: + pass + except FileNotFoundError: # python 3.x + pass + sftp_cnx.putfo(vmxStr, vmxPath, file_size=0, callback=None, confirm=True) + + def create_vm(self, vmTemplate=None, annotation=None, datastore=None, hardware=None, guest_id=None, disks=None, cdrom=None, customvalues=None, networks=None, cloudinit_userdata=None): + vmPathDest = "/vmfs/volumes/" + datastore + "/" + self.name + + ## Sanity checks + for dryRunDisk in [newDisk for newDisk in disks if ('src' in newDisk and newDisk['src'] is not None)]: + if 'copy_or_move' not in dryRunDisk['src']: + return ("'copy_or_move' parameter is mandatory when src is specified for a disk.") + if 'backing_filename' not in dryRunDisk['src']: + return ("'backing_filename' parameter is mandatory when src is specified for a disk.") + + dryRunDiskFileInfo = re.search('^\[(?P.*?)\] *(?P.*\/(?P(?P.*?)(?:--(?P.*?))?\.vmdk))$', dryRunDisk['src']['backing_filename']) + try: + self.esxiCnx.exec_command("vmkfstools -g /vmfs/volumes/" + dryRunDiskFileInfo.group('datastore') + "/" + dryRunDiskFileInfo.group('fulldiskpath')) + except IOError as e: + return "'" + dryRunDisk['src']['backing_filename'] + "' is not accessible (is the VM turned on?)\n" + str(e) + + # Create VM directory + self.esxiCnx.exec_command("mkdir -p " + vmPathDest) + + vmxDict = collections.OrderedDict(esxiFreeScraper.vmx_skeleton) + + diskCount = 0 + + # First apply any vmx settings from the template. + # These will be overridden by explicit configuration. + if vmTemplate: + template_name, template_moid = self.get_vm(vmTemplate, None) + if template_moid: + template_vmxPath, template_vmxDict = self.get_vmx(template_moid) + + # Generic settings + vmxDict.update({"guestos": template_vmxDict['guestos']}) + + # Hardware settings + vmxDict.update({"virtualhw.version": template_vmxDict['virtualhw.version']}) + vmxDict.update({"memsize": template_vmxDict['memsize']}) + if 'numvcpus' in template_vmxDict: + vmxDict.update({"numvcpus": template_vmxDict['numvcpus']}) + if 'cpuid.coresPerSocket' in template_vmxDict: + vmxDict.update({"cpuid.coresPerSocket": template_vmxDict['cpuid.coresPerSocket']}) + if 'vcpu.hotadd' in template_vmxDict: + vmxDict.update({"vcpu.hotadd": template_vmxDict['vcpu.hotadd']}) + if 'mem.hotadd' in template_vmxDict: + vmxDict.update({"mem.hotadd": template_vmxDict['mem.hotadd']}) + if 'sched.mem.pin' in template_vmxDict: + vmxDict.update({"sched.mem.pin": template_vmxDict['sched.mem.pin']}) + + # Network settings + netCount = 0 + while "ethernet" + str(netCount) + ".virtualdev" in template_vmxDict: + vmxDict.update({"ethernet" + str(netCount) + ".virtualdev": template_vmxDict["ethernet" + str(netCount) + ".virtualdev"]}) + vmxDict.update({"ethernet" + str(netCount) + ".networkname": template_vmxDict["ethernet" + str(netCount) + ".networkname"]}) + vmxDict.update({"ethernet" + str(netCount) + ".addresstype": "generated"}) + vmxDict.update({"ethernet" + str(netCount) + ".present": "TRUE"}) + netCount = netCount + 1 + + ### Disk cloning - clone all disks from source + response, cookies = self.soap_client.send_req('<_this type="PropertyCollector">ha-property-collectorVirtualMachinefalselayout' + str(template_moid) + 'false') + xmldom = xml.dom.minidom.parseString(response.read()) + srcDiskFiles = [data.firstChild.data for data in xmldom.getElementsByTagName("diskFile")] + + for srcDiskFile in srcDiskFiles: + srcDiskFileInfo = re.search('^\[(?P.*?)\] *(?P.*\/(?P(?P.*?)(?:--(?P.*?))?\.vmdk))$', srcDiskFile) + diskTypeKey = next((key for key, val in template_vmxDict.items() if val == srcDiskFileInfo.group('filepath')), None) + + if re.search('scsi', diskTypeKey): + controllerTypeStr = "scsi0:" + else: + controllerTypeStr = "sata0:" + + # See if vmTemplate disk exists + try: + (stdin, stdout, stderr) = self.esxiCnx.exec_command("stat /vmfs/volumes/" + srcDiskFileInfo.group('datastore') + "/" + srcDiskFileInfo.group('fulldiskpath')) + except IOError as e: + return (srcDiskFileInfo.group('fulldiskpath') + " not found!") + else: + if diskCount == 0: + disk_filename = self.name + "--boot.vmdk" + else: + if 'diskname_suffix' in srcDiskFileInfo.groupdict() and srcDiskFileInfo.group('diskname_suffix'): + disk_filename = self.name + "--" + srcDiskFileInfo.group('diskname_suffix') + ".vmdk" + else: + disk_filename = self.name + ".vmdk" + self.esxiCnx.exec_command("vmkfstools -i /vmfs/volumes/" + srcDiskFileInfo.group('datastore') + "/" + srcDiskFileInfo.group('fulldiskpath') + " -d thin " + vmPathDest + "/" + disk_filename) + + vmxDict.update({controllerTypeStr + str(diskCount) + ".devicetype": "scsi-hardDisk"}) + vmxDict.update({controllerTypeStr + str(diskCount) + ".present": "TRUE"}) + vmxDict.update({controllerTypeStr + str(diskCount) + ".filename": disk_filename}) + diskCount = diskCount + 1 + + else: + return (vmTemplate + " not found!") + + ## Now add remaining settings, overriding template copies. + + # Generic settings + if guest_id: + vmxDict.update({"guestos": guest_id}) + vmxDict.update({"displayname": self.name}) + vmxDict.update({"vm.createdate": time.time()}) + + if annotation: + vmxDict.update({"annotation": annotation}) + + # Hardware settings + if 'version' in hardware: + vmxDict.update({"virtualhw.version": hardware['version']}) + if 'memory_mb' in hardware: + vmxDict.update({"memsize": hardware['memory_mb']}) + if 'num_cpus' in hardware: + vmxDict.update({"numvcpus": hardware['num_cpus']}) + if 'num_cpu_cores_per_socket' in hardware: + vmxDict.update({"cpuid.coresPerSocket": hardware['num_cpu_cores_per_socket']}) + if 'hotadd_cpu' in hardware: + vmxDict.update({"vcpu.hotadd": hardware['hotadd_cpu']}) + if 'hotadd_memory' in hardware: + vmxDict.update({"mem.hotadd": hardware['hotadd_memory']}) + if 'memory_reservation_lock' in hardware: + vmxDict.update({"sched.mem.pin": hardware['memory_reservation_lock']}) + + # CDROM settings + if cdrom['type'] == 'client': + (stdin, stdout, stderr) = self.esxiCnx.exec_command("find /vmfs/devices/cdrom/ -mindepth 1 ! -type l") + cdrom_dev = stdout.read().decode('UTF-8').lstrip("\r\n").rstrip(" \r\n") + vmxDict.update({"ide0:0.devicetype": "atapi-cdrom"}) + vmxDict.update({"ide0:0.filename": cdrom_dev}) + vmxDict.update({"ide0:0.present": "TRUE"}) + elif cdrom['type'] == 'iso': + if 'iso_path' in cdrom: + vmxDict.update({"ide0:0.devicetype": "cdrom-image"}) + vmxDict.update({"ide0:0.filename": cdrom['iso_path']}) + vmxDict.update({"ide0:0.present": "TRUE"}) + vmxDict.update({"ide0:0.startconnected": "TRUE"}) + + # Network settings + cloudinit_nets = {"version": 2} + for netCount in range(0, len(networks)): + vmxDict.update({"ethernet" + str(netCount) + ".virtualdev": networks[netCount]['virtualDev']}) + vmxDict.update({"ethernet" + str(netCount) + ".networkname": networks[netCount]['networkName']}) + if "macAddress" in networks[netCount]: + vmxDict.update({"ethernet" + str(netCount) + ".addresstype": "static"}) + vmxDict.update({"ethernet" + str(netCount) + ".address": networks[netCount]['macAddress']}) + vmxDict.update({"ethernet" + str(netCount) + ".checkmacaddress": "FALSE"}) + else: + vmxDict.update({"ethernet" + str(netCount) + ".addresstype": "generated"}) + vmxDict.update({"ethernet" + str(netCount) + ".present": "TRUE"}) + if "cloudinit_netplan" in networks[netCount]: + cloudinit_nets.update(networks[netCount]['cloudinit_netplan']) + + # Add cloud-init metadata (hostname & network) + cloudinit_metadata = {"local-hostname": self.name} + if cloudinit_nets['ethernets'].keys(): + # Force guest to use the MAC address as the DHCP identifier, in case the machine-id is not reset for each clone + for cloudeth in cloudinit_nets['ethernets'].keys(): + cloudinit_nets['ethernets'][cloudeth].update({"dhcp-identifier": "mac"}) + # Add the metadata + cloudinit_metadata.update({"network": base64.b64encode(yaml.dump(cloudinit_nets, width=4096, encoding='utf-8')).decode('ascii'), "network.encoding": "base64"}) + vmxDict.update({"guestinfo.metadata": base64.b64encode(yaml.dump(cloudinit_metadata, width=4096, encoding='utf-8')).decode('ascii'), "guestinfo.metadata.encoding": "base64"}) + + # Add cloud-init userdata (must be in MIME multipart format) + if cloudinit_userdata and len(cloudinit_userdata): + import sys + from email.mime.multipart import MIMEMultipart + from email.mime.text import MIMEText + combined_message = MIMEMultipart() + sub_message = MIMEText(yaml.dump({"users": cloudinit_userdata}, width=4096, encoding='utf-8'), "cloud-config", sys.getdefaultencoding()) + sub_message.add_header('Content-Disposition', 'attachment; filename="cloud-config.yaml"') + combined_message.attach(sub_message) + if sys.version_info >= (3, 0): + vmxDict.update({"guestinfo.userdata": base64.b64encode(combined_message.as_bytes()).decode('ascii'), "guestinfo.userdata.encoding": "base64"}) + else: + vmxDict.update({"guestinfo.userdata": base64.b64encode(combined_message.as_string()).decode('ascii'), "guestinfo.userdata.encoding": "base64"}) + + ### Disk create + # If the first disk doesn't exist, create it + bootDisks = [bootDisk for bootDisk in disks if 'boot' in bootDisk] + if len(bootDisks) > 1: + return ("Muiltiple boot disks not allowed") + + if "scsi0:0.filename" not in vmxDict: + if len(bootDisks) == 1: + disk_filename = self.name + "--boot.vmdk" + (stdin, stdout, stderr) = self.esxiCnx.exec_command("vmkfstools -c " + str(bootDisks[0]['size_gb']) + "G -d " + bootDisks[0]['type'] + " " + vmPathDest + "/" + disk_filename) + + vmxDict.update({"scsi0:0.devicetype": "scsi-hardDisk"}) + vmxDict.update({"scsi0:0.present": "TRUE"}) + vmxDict.update({"scsi0:0.filename": disk_filename}) + diskCount = diskCount + 1 + if len(bootDisks) == 0: + return ("Boot disk parameters not defined for new VM") + else: + if len(bootDisks) == 1: + return ("Boot disk parameters defined for cloned VM. Ambiguous requirement - not supported.") + + newDisks = [newDisk for newDisk in disks if 'boot' not in newDisk] + for newDiskCount,newDisk in enumerate(newDisks): + scsiDiskIdx = newDiskCount + diskCount + disk_filename = self.name + "--" + newDisk['volname'] + ".vmdk" + + #Check if new disk already exists - if so, exit + try: + (stdin, stdout, stderr) = self.esxiCnx.exec_command("stat " + vmPathDest + "/" + disk_filename) + except IOError as e: + if 'src' in newDisk and newDisk['src'] is not None: + cloneSrcBackingFile = re.search('^\[(?P.*?)\] *(?P.*\/(?P(?P.*?)(?:--(?P.*?))?\.vmdk))$', newDisk['src']['backing_filename']) + try: + (stdin, stdout, stderr) = self.esxiCnx.exec_command("stat /vmfs/volumes/" + cloneSrcBackingFile.group('datastore') + "/" + cloneSrcBackingFile.group('fulldiskpath')) + except IOError as e: + return (cloneSrcBackingFile.group('fulldiskpath') + " not found!\n" + str(e)) + else: + if newDisk['src']['copy_or_move'] == 'copy': + self.esxiCnx.exec_command("vmkfstools -i /vmfs/volumes/" + cloneSrcBackingFile.group('datastore') + "/" + cloneSrcBackingFile.group('fulldiskpath') + " -d thin " + vmPathDest + "/" + disk_filename) + else: + self.esxiCnx.exec_command("vmkfstools -E /vmfs/volumes/" + cloneSrcBackingFile.group('datastore') + "/" + cloneSrcBackingFile.group('fulldiskpath') + " " + vmPathDest + "/" + disk_filename) + + else: + (stdin, stdout, stderr) = self.esxiCnx.exec_command("vmkfstools -c " + str(newDisk['size_gb']) + "G -d " + newDisk['type'] + " " + vmPathDest + "/" + disk_filename) + + vmxDict.update({"scsi0:" + str(scsiDiskIdx) + ".devicetype": "scsi-hardDisk"}) + vmxDict.update({"scsi0:" + str(scsiDiskIdx) + ".present": "TRUE"}) + vmxDict.update({"scsi0:" + str(scsiDiskIdx) + ".filename": disk_filename}) + diskCount = diskCount + 1 + else: + return (disk_filename + " already present!") + + # write the vmx + self.put_vmx(vmxDict, vmPathDest + "/" + self.name + ".vmx") + + # Register the VM + (stdin, stdout, stderr) = self.esxiCnx.exec_command("vim-cmd solo/registervm " + vmPathDest + "/" + self.name + ".vmx") + self.moid = int(stdout.readlines()[0]) + + def update_vm(self, annotation=None): + if annotation: + # Update the config (annotation) in the running VM + response, cookies = self.soap_client.send_req('<_this type="VirtualMachine">' + str(self.moid) + '' + annotation + '') + waitresp = self.soap_client.wait_for_task(xml.dom.minidom.parseString(response.read()).getElementsByTagName('returnval')[0].firstChild.data) + if waitresp != 'success': + return ("Failed to ReconfigVM_Task: %s" % waitresp) + + # Now update the disk (should not be necessary, but for some reason, sometimes the ReconfigVM_Task does not flush config to disk). + vmxPath, vmxDict = self.get_vmx(self.moid) + vmxDict.update({"annotation": annotation}) + self.put_vmx(vmxDict, vmxPath) + + # def update_vm_pyvmomi(self, annotation=None): + # if annotation: + # from pyVmomi import vim + # from pyVim.task import WaitForTask + # from pyVim import connect + # + # SI = connect.SmartConnectNoSSL(host=hostname, user=username, pwd=password, port=443) + # vm = SI.content.searchIndex.FindByDnsName(None, self.name, True) + # + # spec = vim.vm.ConfigSpec() + # spec.annotation = annotation + # task = vm.ReconfigVM_Task(spec) + # WaitForTask(task) + + # Delete the cloud-init guestinfo.metadata info from the .vmx file, otherwise it will be impossible to change the network configuration or hostname. + def delete_cloudinit(self): + vmxPath, vmxDict = self.get_vmx(self.moid) + if 'guestinfo.metadata' in vmxDict: + del vmxDict['guestinfo.metadata'] + if 'guestinfo.metadata.encoding' in vmxDict: + del vmxDict['guestinfo.metadata.encoding'] + if 'guestinfo.userdata' in vmxDict: + del vmxDict['guestinfo.userdata'] + if 'guestinfo.userdata.encoding' in vmxDict: + del vmxDict['guestinfo.userdata.encoding'] + + # write the vmx + self.put_vmx(vmxDict, vmxPath) + + +def main(): + argument_spec = { + "hostname": {"type": "str", "required": True}, + "username": {"type": "str", "required": True}, + "password": {"type": "str"}, + "name": {"type": "str"}, + "moid": {"type": "str"}, + "template": {"type": "str"}, + "state": {"type": "str", "default": 'present', "choices": ['absent', 'present', 'rebootguest', 'poweredon', 'poweredoff', 'shutdownguest']}, + "force": {"type": "bool", "default": False}, + "datastore": {"type": "str"}, + "annotation": {"type": "str", "default": ""}, + "guest_id": {"type": "str", "default": "ubuntu-64"}, + "hardware": {"type": "dict", "default": {"version": "15", "num_cpus": "2", "memory_mb": "2048", "num_cpu_cores_per_socket": "1", "hotadd_cpu": "False", "hotadd_memory": "False", "memory_reservation_lock": "False"}}, + "cloudinit_userdata": {"type": "list", "default": []}, + "disks": {"type": "list", "default": [{"boot": True, "size_gb": 16, "type": "thin"}]}, + "cdrom": {"type": "dict", "default": {"type": "client"}}, + "networks": {"type": "list", "default": [{"networkName": "VM Network", "virtualDev": "vmxnet3"}]}, + "customvalues": {"type": "list", "default": []}, + "wait": {"type": "bool", "default": True}, + "wait_timeout": {"type": "int", "default": 180} + } + + if not (len(sys.argv) > 1 and sys.argv[1] == "console"): + module = AnsibleModule(argument_spec=argument_spec, supports_check_mode=True, required_one_of=[['name', 'moid']]) + else: + # For testing without Ansible (e.g on Windows) + class cDummyAnsibleModule(): + ## Create blank VM + # params = { + # "hostname": "192.168.1.3", + # "username": "svc", + # "password": None, + # "name": "test-asdf", + # "moid": None, + # "template": None, + # "state": "present", + # "force": False, + # "datastore": "4tb-evo860-ssd", + # "annotation": "{'Name': 'test-asdf'}", + # "guest_id": "ubuntu-64", + # "hardware": {"version": "15", "num_cpus": "2", "memory_mb": "2048"}, + # "cloudinit_userdata": [], + # "disks": [{"boot": True, "size_gb": 16, "type": "thin"}, {"size_gb": 5, "type": "thin"}, {"size_gb": 2, "type": "thin"}], + # "cdrom": {"type": "iso", "iso_path": "/vmfs/volumes/4tb-evo860-ssd/ISOs/ubuntu-18.04.2-server-amd64.iso"}, + # "networks": [{"networkName": "VM Network", "virtualDev": "vmxnet3"}], + # "customvalues": [], + # "wait": True, + # "wait_timeout": 180, + # } + + ## Clone VM + params = { + "annotation": "{'lifecycle_state': 'current', 'Name': 'test-prod-sys-a0-1589979249', 'cluster_suffix': '1589979249', 'hosttype': 'sys', 'cluster_name': 'test-prod', 'env': 'prod', 'owner': 'dougal'}", + "cdrom": {"type": "client"}, + "cloudinit_userdata": [], + "customvalues": [], + "datastore": "4tb-evo860-ssd", + # "disks": [{"size_gb": 1, "type": "thin", "volname": "test"}], + "disks": [{"size_gb": 1, "type": "thin", "volname": "test_new"}, {"size_gb": 1, "type": "thin", "volname": "test_clone", "src": {"backing_filename": "[4tb-evo860-ssd] parsnip-dev-sys-a0-blue/parsnip-dev-sys-a0-blue--webdata.vmdk", "copy_or_move": "copy"}}], + "force": False, + "guest_id": "ubuntu-64", + "hardware": {"memory_mb": "2048", "num_cpus": "2", "version": "15"}, + "hostname": "192.168.1.3", + "moid": None, + "name": "gold-alpine-test1", + "networks": [{"cloudinit_netplan": {"ethernets": {"eth0": {"dhcp4": True}}}, "networkName": "VM Network", "virtualDev": "vmxnet3"}], + "password": sys.argv[2], + "state": "present", + "template": "gold-alpine", + "username": "svc", + "wait": True, + "wait_timeout": 180 + } + + ## Delete VM + # params = { + # "hostname": "192.168.1.3", + # "username": "svc", + # "password": None, + # "name": "test-asdf", + # "moid": None, + # "state": "absent" + # } + + def exit_json(self, changed, **kwargs): + print(changed, json.dumps(kwargs, sort_keys=True, indent=4, separators=(',', ': '))) + + def fail_json(self, msg): + print("Failed: " + msg) + exit(1) + + module = cDummyAnsibleModule() + + iScraper = esxiFreeScraper(hostname=module.params['hostname'], + username=module.params['username'], + password=module.params['password'], + name=module.params['name'], + moid=module.params['moid']) + + if iScraper.moid is None and iScraper.name is None: + module.fail_json(msg="If VM doesn't already exist, you must provide a name for it") + + # Check if the VM exists before continuing + if module.params['state'] == 'shutdownguest': + if iScraper.moid: + iScraper.soap_client.send_req('<_this type="VirtualMachine">' + str(iScraper.moid) + '') + time_s = 60 + while time_s > 0: + (stdin, stdout, stderr) = iScraper.esxiCnx.exec_command("vim-cmd vmsvc/power.getstate " + str(iScraper.moid)) + if re.search('Powered off', stdout.read().decode('UTF-8')) is not None: + break + else: + time.sleep(1) + time_s = time_s - 1 + module.exit_json(changed=True, meta={"msg": "Shutdown " + iScraper.name + ": " + str(iScraper.moid)}) + else: + module.fail_json(msg="VM doesn't exist.") + + elif module.params['state'] == 'poweredon': + if iScraper.moid: + response, cookies = iScraper.soap_client.send_req('<_this type="VirtualMachine">' + str(iScraper.moid) + '') + if iScraper.soap_client.wait_for_task(xml.dom.minidom.parseString(response.read()).getElementsByTagName('returnval')[0].firstChild.data, int(module.params['wait_timeout'])) != 'success': + module.fail_json(msg="Failed to PowerOnVM_Task") + module.exit_json(changed=True, meta={"msg": "Powered-on " + iScraper.name + ": " + str(iScraper.moid)}) + else: + module.fail_json(msg="VM doesn't exist.") + + elif module.params['state'] == 'poweredoff': + if iScraper.moid: + response, cookies = iScraper.soap_client.send_req('<_this type="VirtualMachine">' + str(iScraper.moid) + '') + if iScraper.soap_client.wait_for_task(xml.dom.minidom.parseString(response.read()).getElementsByTagName('returnval')[0].firstChild.data, int(module.params['wait_timeout'])) != 'success': + module.fail_json(msg="Failed to PowerOffVM_Task") + module.exit_json(changed=True, meta={"msg": "Powered-off " + iScraper.name + ": " + str(iScraper.moid)}) + else: + module.fail_json(msg="VM doesn't exist.") + + elif module.params['state'] == 'absent': + if iScraper.moid: + # Turn off (ignoring failures), then destroy + response, cookies = iScraper.soap_client.send_req('<_this type="VirtualMachine">' + str(iScraper.moid) + '') + iScraper.soap_client.wait_for_task(xml.dom.minidom.parseString(response.read()).getElementsByTagName('returnval')[0].firstChild.data, int(module.params['wait_timeout'])) + + response, cookies = iScraper.soap_client.send_req('<_this type="VirtualMachine">' + str(iScraper.moid) + '') + if iScraper.soap_client.wait_for_task(xml.dom.minidom.parseString(response.read()).getElementsByTagName('returnval')[0].firstChild.data, int(module.params['wait_timeout'])) != 'success': + module.fail_json(msg="Failed to Destroy_Task") + module.exit_json(changed=True, meta={"msg": "Deleted " + iScraper.name + ": " + str(iScraper.moid)}) + else: + module.exit_json(changed=False, meta={"msg": "VM " + iScraper.name + ": already absent."}) + + elif module.params['state'] == 'rebootguest': + if iScraper.moid: + (stdin, stdout, stderr) = iScraper.esxiCnx.exec_command("vim-cmd vmsvc/power.getstate " + str(iScraper.moid)) + if re.search('Powered off', stdout.read().decode('UTF-8')) is not None: + response, cookies = iScraper.send_req('<_this type="VirtualMachine">' + str(iScraper.moid) + '') + if iScraper.soap_client.wait_for_task(xml.dom.minidom.parseString(response.read()).getElementsByTagName('returnval')[0].firstChild.data, int(module.params['wait_timeout'])) != 'success': + module.fail_json(msg="Failed to PowerOnVM_Task") + else: + response, cookies = iScraper.send_req('<_this type="VirtualMachine">' + str(iScraper.moid) + '') + if iScraper.soap_client.wait_for_task(xml.dom.minidom.parseString(response.read()).getElementsByTagName('returnval')[0].firstChild.data, int(module.params['wait_timeout'])) != 'success': + module.fail_json(msg="Failed to RebootGuest") + module.exit_json(changed=True, meta={"msg": "Rebooted " + iScraper.name + ": " + str(iScraper.moid)}) + else: + module.fail_json(msg="VM doesn't exist.") + + elif module.params['state'] == 'present': + exit_args = {} + # If the VM already exists, and the 'force' flag is set, then we delete it (and recreate it) + if iScraper.moid and module.params['force']: + # Turn off (ignoring failures), then destroy + response, cookies = iScraper.soap_client.send_req('<_this type="VirtualMachine">' + str(iScraper.moid) + '') + iScraper.soap_client.wait_for_task(xml.dom.minidom.parseString(response.read()).getElementsByTagName('returnval')[0].firstChild.data, int(module.params['wait_timeout'])) + + response, cookies = iScraper.soap_client.send_req('<_this type="VirtualMachine">' + str(iScraper.moid) + '') + if iScraper.soap_client.wait_for_task(xml.dom.minidom.parseString(response.read()).getElementsByTagName('returnval')[0].firstChild.data, int(module.params['wait_timeout'])) != 'success': + module.fail_json(msg="Failed to Destroy_Task") + iScraper.moid = None + + # If the VM doesn't exist, create it. + if iScraper.moid is None: + createVmResult = iScraper.create_vm(module.params['template'], module.params['annotation'], module.params['datastore'], module.params['hardware'], module.params['guest_id'], module.params['disks'], module.params['cdrom'], module.params['customvalues'], module.params['networks'], module.params['cloudinit_userdata']) + if createVmResult != None: + module.fail_json(msg="Failed to create_vm: %s" % createVmResult) + + response, cookies = iScraper.soap_client.send_req('<_this type="VirtualMachine">' + str(iScraper.moid) + '') + waitresp = iScraper.soap_client.wait_for_task(xml.dom.minidom.parseString(response.read()).getElementsByTagName('returnval')[0].firstChild.data, int(module.params['wait_timeout'])) + if waitresp != 'success': + module.fail_json(msg="Failed to PowerOnVM_Task: %s" % waitresp) + + isChanged = True + + ## Delete the cloud-init config + iScraper.delete_cloudinit() + + if "wait" in module.params and module.params['wait']: + time_s = int(module.params['wait_timeout']) + while time_s > 0: + (stdin, stdout, stderr) = iScraper.esxiCnx.exec_command("vim-cmd vmsvc/get.guest " + str(iScraper.moid)) + guest_info = stdout.read().decode('UTF-8') + vm_params = re.search('\s*hostName\s*=\s*\"?(?P.*?)\"?,.*\n\s*ipAddress\s*=\s*\"?(?P.*?)\"?,.*', guest_info) + if vm_params and vm_params.group('vm_ip') != "" and vm_params.group('vm_hostname') != "": + break + else: + time.sleep(1) + time_s = time_s - 1 + + module.exit_json(changed=isChanged, + guest_info=guest_info, + hostname=vm_params.group('vm_hostname'), + ip_address=vm_params.group('vm_ip'), + name=module.params['name'], + moid=iScraper.moid) + else: + module.exit_json(changed=isChanged, + hostname="", + ip_address="", + name=module.params['name'], + moid=iScraper.moid) + + else: + updateVmResult = iScraper.update_vm(annotation=module.params['annotation']) + if updateVmResult != None: + module.fail_json(msg=updateVmResult) + + module.exit_json(changed=True, name=module.params['name'], moid=iScraper.moid) + + else: + module.exit_json(changed=False, meta={"msg": "No state."}) + + +if __name__ == '__main__': + main() diff --git a/_dependencies/library/esxifree_guest_LICENSE b/_dependencies/library/esxifree_guest_LICENSE new file mode 100644 index 00000000..3c642ec5 --- /dev/null +++ b/_dependencies/library/esxifree_guest_LICENSE @@ -0,0 +1,29 @@ +BSD 3-Clause License + +Copyright (c) 2020, Dougal Seeley +All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are met: + +* Redistributions of source code must retain the above copyright notice, this + list of conditions and the following disclaimer. + +* Redistributions in binary form must reproduce the above copyright notice, + this list of conditions and the following disclaimer in the documentation + and/or other materials provided with the distribution. + +* Neither the name of the copyright holder nor the names of its + contributors may be used to endorse or promote products derived from + this software without specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE +DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE +FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR +SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER +CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, +OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. diff --git a/_dependencies/library/esxifree_guest_README.md b/_dependencies/library/esxifree_guest_README.md new file mode 100644 index 00000000..9cdceaa8 --- /dev/null +++ b/_dependencies/library/esxifree_guest_README.md @@ -0,0 +1,53 @@ +# esxifree_guest +https://github.com/dseeley/esxifree_guest + +This module can be used to create new ESXi virtual machines, including cloning from templates or other virtual machines. + +It does so using direct SOAP calls and Paramiko SSH to the host - without using the vSphere API - meaning it can be used on the free hypervisor. + +## Configuration +Your ESXi host needs some config: ++ Enable SSH + + Inside the web UI, navigate to “Manage”, then the “Services” tab. Find the entry called: “TSM-SSH”, and enable it. ++ Enable “Guest IP Hack” + + `esxcli system settings advanced set -o /Net/GuestIPHack -i 1` ++ Open VNC Ports on the Firewall + ``` + Packer connects to the VM using VNC, so we’ll open a range of ports to allow it to connect to it. + + First, ensure we can edit the firewall configuration: + + chmod 644 /etc/vmware/firewall/service.xml + chmod +t /etc/vmware/firewall/service.xml + Then append the range we want to open to the end of the file: + + + packer-vnc + + inbound + tcp + dst + + 5900 + 6000 + + + true + true + + Finally, restore the permissions and reload the firewall: + + chmod 444 /etc/vmware/firewall/service.xml + esxcli network firewall refresh + ``` + +## Requirements ++ python 3 ++ paramiko ++ Any base-images from which clones are to be made must have cloud-init and [`cloud-init-vmware-guestinfo`](https://github.com/vmware/cloud-init-vmware-guestinfo) installed + +## Execution +This can be run as an Ansible module (see inline documentation), or from the console: +```bash +python3 ./esxifree_guest.py console +``` \ No newline at end of file From 5ac63407c294f139f69a07b6365b865eca4d685d Mon Sep 17 00:00:00 2001 From: Dougal Seeley Date: Fri, 18 Sep 2020 09:40:09 +0100 Subject: [PATCH 25/58] Update EXAMPLE/ to reflect new ability to dynamically load clusterverse via galaxy. --- EXAMPLE/cluster.yml | 22 ++++++++++++++++------ EXAMPLE/redeploy.yml | 10 ++++++++++ 2 files changed, 26 insertions(+), 6 deletions(-) diff --git a/EXAMPLE/cluster.yml b/EXAMPLE/cluster.yml index 1cbe7eb1..6fdab2b0 100644 --- a/EXAMPLE/cluster.yml +++ b/EXAMPLE/cluster.yml @@ -1,27 +1,37 @@ --- +- name: Download required roles + hosts: localhost:all + connection: local + tasks: + - name: "ansible-galaxy install" + local_action: command ansible-galaxy install -r requirements.yml + delegate_to: localhost + run_once: true + tags: [always] + - name: Deploy the cluster hosts: localhost connection: local tasks: - - { import_role: { name: clusterverse/clean }, tags: [clusterverse_clean], when: clean is defined } # Alternative include_role (need to force the tags): - { include_role: { name: clusterverse/clean, apply: {tags: [clusterverse_clean]}}, tags: [clusterverse_clean], when: clean is defined } - - { import_role: { name: clusterverse/create }, tags: [clusterverse_create] } - - { import_role: { name: clusterverse/dynamic_inventory }, tags: [clusterverse_dynamic_inventory] } + - { include_role: { name: "clusterverse/clean", apply: {tags: &roletag_clean ["clusterverse_clean"]} }, tags: *roletag_clean, when: "clean is defined" } + - { include_role: { name: "clusterverse/create", apply: {tags: &roletag_create ["clusterverse_create"]} }, tags: *roletag_create } + - { include_role: { name: "clusterverse/dynamic_inventory", apply: {tags: &roletag_dynamic_inventory ["clusterverse_dynamic_inventory"]} }, tags: *roletag_dynamic_inventory } - name: Configure the cluster hosts: all tasks: - - { import_role: { name: clusterverse/config }, tags: [clusterverse_config] } + - { include_role: { name: "clusterverse/config", apply: {tags: &roletag_config ["clusterverse_config"]} }, tags: *roletag_config } ## Application roles - name: Application roles hosts: all tasks: - - { import_role: { name: "testrole" }, tags: [testrole] } + - { include_role: { name: "testrole", apply: {tags: &roletag_testrole ["testrole"]} }, tags: *roletag_testrole } ## - name: Perform cluster readiness operations hosts: localhost connection: local tasks: - - { import_role: { name: clusterverse/readiness }, tags: [clusterverse_readiness] } + - { include_role: { name: "clusterverse/readiness", apply: {tags: &roletag_readiness ["clusterverse_readiness"]} }, tags: *roletag_readiness } diff --git a/EXAMPLE/redeploy.yml b/EXAMPLE/redeploy.yml index 4fc6fac6..810213c8 100644 --- a/EXAMPLE/redeploy.yml +++ b/EXAMPLE/redeploy.yml @@ -1,5 +1,15 @@ --- +- name: Download required roles + hosts: localhost:all + connection: local + tasks: + - name: "ansible-galaxy install" + local_action: command ansible-galaxy install -r requirements.yml + delegate_to: localhost + run_once: true + tags: [always] + - name: Redeploy hosts: localhost connection: local From 4ff497a8a6166cdabe6d7bf9b9fc2dc8cc072d3f Mon Sep 17 00:00:00 2001 From: Dougal Seeley Date: Fri, 18 Sep 2020 13:46:12 +0100 Subject: [PATCH 26/58] + Updates to allow clusterverse to be dynamically acquired via ansible-galaxy as part of the playbook. + Change the EXAMPLE/cluster.yml and redeploy.yml to use include_roles (dynamic load, because clusterverse won't exist yet) + Change cli_facts.py plugin from a callback to a vars plugin (callback_plugins are not evaluated in include_role) + Remove dependency on ansible facts for epoch time. + Simplify the package upgrade logic to use new 'reboot:' action + Make the inclusion of 'regionzone' in the inventory dependent on it existing for that cloud type. + Add short delay to allow bind9 zone transfers to complete --- EXAMPLE/cluster.yml | 3 +++ EXAMPLE/group_vars/test_aws_euw1/cluster_vars.yml | 3 --- EXAMPLE/group_vars/test_gcp_euw1/cluster_vars.yml | 5 +---- EXAMPLE/redeploy.yml | 2 +- redeploy/tasks/main.yml | 2 +- 5 files changed, 6 insertions(+), 9 deletions(-) diff --git a/EXAMPLE/cluster.yml b/EXAMPLE/cluster.yml index 6fdab2b0..25102511 100644 --- a/EXAMPLE/cluster.yml +++ b/EXAMPLE/cluster.yml @@ -13,7 +13,10 @@ - name: Deploy the cluster hosts: localhost connection: local + gather_facts: no tasks: + - { name: "Get dependent roles via ansible-galaxy", local_action: "command ansible-galaxy install -r requirements.yml", tags: ["always"] } + - { include_role: { name: "clusterverse/clean", apply: {tags: &roletag_clean ["clusterverse_clean"]} }, tags: *roletag_clean, when: "clean is defined" } - { include_role: { name: "clusterverse/create", apply: {tags: &roletag_create ["clusterverse_create"]} }, tags: *roletag_create } - { include_role: { name: "clusterverse/dynamic_inventory", apply: {tags: &roletag_dynamic_inventory ["clusterverse_dynamic_inventory"]} }, tags: *roletag_dynamic_inventory } diff --git a/EXAMPLE/group_vars/test_aws_euw1/cluster_vars.yml b/EXAMPLE/group_vars/test_aws_euw1/cluster_vars.yml index 93c9bc10..e262bf7d 100644 --- a/EXAMPLE/group_vars/test_aws_euw1/cluster_vars.yml +++ b/EXAMPLE/group_vars/test_aws_euw1/cluster_vars.yml @@ -1,11 +1,8 @@ --- -redeploy_schemes_supported: ['_scheme_addallnew_rmdisk_rollback', '_scheme_addnewvm_rmdisk_rollback', '_scheme_rmvm_rmdisk_only'] - #redeploy_scheme: _scheme_addallnew_rmdisk_rollback #redeploy_scheme: _scheme_addnewvm_rmdisk_rollback #redeploy_scheme: _scheme_rmvm_rmdisk_only -#redeploy_scheme: _scheme_rmvm_keepdisk_only app_name: "test" # The name of the application cluster (e.g. 'couchbase', 'nginx'); becomes part of cluster_name. app_class: "test" # The class of application (e.g. 'database', 'webserver'); becomes part of the fqdn diff --git a/EXAMPLE/group_vars/test_gcp_euw1/cluster_vars.yml b/EXAMPLE/group_vars/test_gcp_euw1/cluster_vars.yml index eb10848f..4b08de80 100644 --- a/EXAMPLE/group_vars/test_gcp_euw1/cluster_vars.yml +++ b/EXAMPLE/group_vars/test_gcp_euw1/cluster_vars.yml @@ -4,12 +4,9 @@ gcp_credentials_file: "{{ lookup('env','GCP_CREDENTIALS') | default('/dev/null', true) }}" gcp_credentials_json: "{{ lookup('file', gcp_credentials_file) | default({'project_id': 'GCP_CREDENTIALS__NOT_SET','client_email': 'GCP_CREDENTIALS__NOT_SET'}, true) }}" -redeploy_schemes_supported: ['_scheme_addallnew_rmdisk_rollback', '_scheme_addnewvm_rmdisk_rollback', '_scheme_rmvm_rmdisk_only'] - -redeploy_scheme: _scheme_addallnew_rmdisk_rollback +#redeploy_scheme: _scheme_addallnew_rmdisk_rollback #redeploy_scheme: _scheme_addnewvm_rmdisk_rollback #redeploy_scheme: _scheme_rmvm_rmdisk_only -#redeploy_scheme: _scheme_rmvm_keepdisk_only app_name: "test" # The name of the application cluster (e.g. 'couchbase', 'nginx'); becomes part of cluster_name. app_class: "test" # The class of application (e.g. 'database', 'webserver'); becomes part of the fqdn diff --git a/EXAMPLE/redeploy.yml b/EXAMPLE/redeploy.yml index 810213c8..65ffac69 100644 --- a/EXAMPLE/redeploy.yml +++ b/EXAMPLE/redeploy.yml @@ -15,7 +15,7 @@ connection: local tasks: - name: "Get dependent roles via ansible-galaxy" - local_action: "command ansible-galaxy install -fr requirements.yml" + local_action: "command ansible-galaxy install -r requirements.yml" tags: ["always"] - name: Run redeploy diff --git a/redeploy/tasks/main.yml b/redeploy/tasks/main.yml index e4788e94..bb4f62ee 100644 --- a/redeploy/tasks/main.yml +++ b/redeploy/tasks/main.yml @@ -4,7 +4,7 @@ block: - assert: { that: "clean is not defined", msg: "Must not set the 'clean' variable for a redeploy" } - assert: { that: "canary is defined and (canary is defined and canary in ['start', 'finish', 'none', 'tidy', 'revert'])", msg: "Canary must be 'start', 'finish', 'none', 'tidy' or 'revert'" } - - assert: { that: "redeploy_scheme is defined and redeploy_scheme in redeploy_schemes_supported" } + - assert: { that: "redeploy_scheme is defined" } - assert: { that: "cluster_hosts_state | length", msg: "Redeploy only possible with an existing cluster." } - name: "Run the {{redeploy_scheme}} redploy scheme" From 6fa345fd7fe8c4cdbf2001d5f2c338d0665e911f Mon Sep 17 00:00:00 2001 From: Dougal Seeley Date: Sun, 20 Sep 2020 07:52:23 +0100 Subject: [PATCH 27/58] Increase efficiency of _scheme_rmvm_keepdisk_only and add AWS support --- EXAMPLE/group_vars/_skel/cluster_vars.yml | 8 +- .../group_vars/test_aws_euw1/cluster_vars.yml | 8 +- _dependencies/tasks/main.yml | 3 + config/tasks/disks_auto.yml | 20 +-- create/tasks/aws.yml | 126 ++++++++++++------ .../tasks/main.yml | 2 +- .../tasks/main.yml | 2 +- .../tasks/_add_diskinfo_esxifree.yml | 36 ----- ..._diskinfo_to_cluster_hosts_target__aws.yml | 34 +++++ ...info_to_cluster_hosts_target__esxifree.yml | 37 +++++ .../tasks/by_hosttype.yml | 2 +- .../tasks/by_hosttype_by_host.yml | 49 +++---- .../_scheme_rmvm_keepdisk_only/tasks/main.yml | 14 +- 13 files changed, 207 insertions(+), 134 deletions(-) delete mode 100644 redeploy/_scheme_rmvm_keepdisk_only/tasks/_add_diskinfo_esxifree.yml create mode 100644 redeploy/_scheme_rmvm_keepdisk_only/tasks/_add_src_diskinfo_to_cluster_hosts_target__aws.yml create mode 100644 redeploy/_scheme_rmvm_keepdisk_only/tasks/_add_src_diskinfo_to_cluster_hosts_target__esxifree.yml diff --git a/EXAMPLE/group_vars/_skel/cluster_vars.yml b/EXAMPLE/group_vars/_skel/cluster_vars.yml index 96bafb53..aab028aa 100644 --- a/EXAMPLE/group_vars/_skel/cluster_vars.yml +++ b/EXAMPLE/group_vars/_skel/cluster_vars.yml @@ -73,12 +73,12 @@ cluster_name: "{{app_name}}-{{buildenv}}" # Identifies the cluster within # hosttype_vars: # sys: {vms_by_az: {a: 1, b: 1, c: 1}, flavor: t3a.nano, version: "{{sys_version | default('')}}", auto_volumes: []} # # sysnobeats: {vms_by_az: {a: 1, b: 0, c: 0}, skip_beat_install:true, flavor: t3a.nano, version: "{{sysnobeats_version | default('')}}", auto_volumes: [] -# # sysdisks: {vms_by_az: {a: 1, b: 0, c: 0}, flavor: t3a.nano, version: "{{sysdisks_version | default('')}}", auto_volumes: [{"device_name": "/dev/sdb", mountpoint: "/var/log/mysvc", fstype: "ext4", "volume_type": "gp2", "volume_size": 2, ephemeral: False, encrypted: True, "delete_on_termination": true, perms: {owner: "root", group: "sudo", mode: "775"} }, {"device_name": "/dev/sdc", mountpoint: "/var/log/mysvc2", fstype: "ext4", "volume_type": "gp2", "volume_size": 3, ephemeral: False, encrypted: True, "delete_on_termination": true}, {"device_name": "/dev/sdd", mountpoint: "/var/log/mysvc3", fstype: "ext4", "volume_type": "gp2", "volume_size": 2, ephemeral: False, encrypted: True, "delete_on_termination": true}]} -# # sysdisks_snapshot: {vms_by_az: {a: 1, b: 1, c: 0}, flavor: t3a.nano, auto_volumes: [{"snapshot_tags": {"tag:backup_id": "57180566894481854905"}, "device_name": "/dev/sdb", mountpoint: "/data", fstype: "ext4", "volume_type": "gp2", "volume_size": 2, ephemeral: False, encrypted: True, "delete_on_termination": true }]} +# # sysdisks: {vms_by_az: {a: 1, b: 0, c: 0}, flavor: t3a.nano, version: "{{sysdisks_version | default('')}}", auto_volumes: [{"device_name": "/dev/sdf", mountpoint: "/var/log/mysvc", fstype: "ext4", "volume_type": "gp2", "volume_size": 2, ephemeral: False, encrypted: True, "delete_on_termination": true, perms: {owner: "root", group: "sudo", mode: "775"} }, {"device_name": "/dev/sdg", mountpoint: "/var/log/mysvc2", fstype: "ext4", "volume_type": "gp2", "volume_size": 3, ephemeral: False, encrypted: True, "delete_on_termination": true}, {"device_name": "/dev/sdh", mountpoint: "/var/log/mysvc3", fstype: "ext4", "volume_type": "gp2", "volume_size": 2, ephemeral: False, encrypted: True, "delete_on_termination": true}]} +# # sysdisks_snapshot: {vms_by_az: {a: 1, b: 1, c: 0}, flavor: t3a.nano, auto_volumes: [{"snapshot_tags": {"tag:backup_id": "57180566894481854905"}, "device_name": "/dev/sdf", mountpoint: "/data", fstype: "ext4", "volume_type": "gp2", "volume_size": 2, ephemeral: False, encrypted: True, "delete_on_termination": true }]} # # hostnvme_multi: {vms_by_az: {a: 1, b: 0, c: 0}, flavor: i3en.2xlarge, auto_volumes: [], nvme: {volumes: [{mountpoint: "/var/log/mysvc", fstype: ext4, volume_size: 2500}, {mountpoint: "/var/log/mysvc2", fstype: ext4, volume_size: 2500}]} } # # hostnvme_lvm: {vms_by_az: {a: 1, b: 0, c: 0}, flavor: i3en.2xlarge, auto_volumes: [], nvme: {volumes: [{mountpoint: "/var/log/mysvc", fstype: ext4, volume_size: 2500}, {mountpoint: "/var/log/mysvc", fstype: ext4, volume_size: 2500}], lvmparams: {vg_name: "vg0", lv_name: "lv0", lv_size: "+100%FREE"} } } -# # hostssd: {vms_by_az: {a: 1, b: 0, c: 0}, flavor: c3.large, auto_volumes: [{device_name: "/dev/sdb", mountpoint: "/var/log/mysvc", fstype: "ext4", "volume_type": "gp2", "volume_size": 2, ephemeral: False, encrypted: True, "delete_on_termination": true}]} -# # hosthdd: {vms_by_az: {a: 1, b: 0, c: 0}, flavor: h1.2xlarge, auto_volumes: [{device_name: "/dev/sdb", mountpoint: "/var/log/mysvc", fstype: "ext4", "volume_type": "gp2", "volume_size": 2, ephemeral: False, encrypted: True, "delete_on_termination": true}]} +# # hostssd: {vms_by_az: {a: 1, b: 0, c: 0}, flavor: c3.large, auto_volumes: [{device_name: "/dev/sdf", mountpoint: "/var/log/mysvc", fstype: "ext4", "volume_type": "gp2", "volume_size": 2, ephemeral: False, encrypted: True, "delete_on_termination": true}]} +# # hosthdd: {vms_by_az: {a: 1, b: 0, c: 0}, flavor: h1.2xlarge, auto_volumes: [{device_name: "/dev/sdf", mountpoint: "/var/log/mysvc", fstype: "ext4", "volume_type": "gp2", "volume_size": 2, ephemeral: False, encrypted: True, "delete_on_termination": true}]} # aws_access_key: "" # aws_secret_key: "" # vpc_name: "test{{buildenv}}" diff --git a/EXAMPLE/group_vars/test_aws_euw1/cluster_vars.yml b/EXAMPLE/group_vars/test_aws_euw1/cluster_vars.yml index e262bf7d..24c6ed15 100644 --- a/EXAMPLE/group_vars/test_aws_euw1/cluster_vars.yml +++ b/EXAMPLE/group_vars/test_aws_euw1/cluster_vars.yml @@ -76,12 +76,12 @@ cluster_vars: sandbox: hosttype_vars: sys: {vms_by_az: {a: 1, b: 1, c: 1}, flavor: t3a.nano, version: "{{sys_version | default('')}}", auto_volumes: []} -# sysdisks: {vms_by_az: {a: 1, b: 0, c: 0}, flavor: t3a.nano, version: "{{sysdisks_version | default('')}}", auto_volumes: [{"device_name": "/dev/sdb", mountpoint: "/var/log/mysvc", fstype: "ext4", "volume_type": "gp2", "volume_size": 2, ephemeral: False, encrypted: True, "delete_on_termination": true, perms: {owner: "root", group: "sudo", mode: "775"} }, {"device_name": "/dev/sdc", mountpoint: "/var/log/mysvc2", fstype: "ext4", "volume_type": "gp2", "volume_size": 3, ephemeral: False, encrypted: True, "delete_on_termination": true}, {"device_name": "/dev/sdd", mountpoint: "/var/log/mysvc3", fstype: "ext4", "volume_type": "gp2", "volume_size": 2, ephemeral: False, encrypted: True, "delete_on_termination": true}]} -# sysdisks_snapshot: {vms_by_az: {a: 1, b: 1, c: 0}, flavor: t3a.nano, auto_volumes: [{"snapshot_tags": {"tag:backup_id": "57180566894481854905"}, "device_name": "/dev/sdb", mountpoint: "/data", fstype: "ext4", "volume_type": "gp2", "volume_size": 2, ephemeral: False, encrypted: True, "delete_on_termination": true }]} +# sysdisks: {vms_by_az: {a: 1, b: 0, c: 0}, flavor: t3a.nano, version: "{{sysdisks_version | default('')}}", auto_volumes: [{"device_name": "/dev/sdf", mountpoint: "/var/log/mysvc", fstype: "ext4", "volume_type": "gp2", "volume_size": 2, ephemeral: False, encrypted: True, "delete_on_termination": true, perms: {owner: "root", group: "sudo", mode: "775"} }, {"device_name": "/dev/sdg", mountpoint: "/var/log/mysvc2", fstype: "ext4", "volume_type": "gp2", "volume_size": 3, ephemeral: False, encrypted: True, "delete_on_termination": true}, {"device_name": "/dev/sdh", mountpoint: "/var/log/mysvc3", fstype: "ext4", "volume_type": "gp2", "volume_size": 2, ephemeral: False, encrypted: True, "delete_on_termination": true}]} +# sysdisks_snapshot: {vms_by_az: {a: 1, b: 1, c: 0}, flavor: t3a.nano, auto_volumes: [{"snapshot_tags": {"tag:backup_id": "57180566894481854905"}, "device_name": "/dev/sdf", mountpoint: "/data", fstype: "ext4", "volume_type": "gp2", "volume_size": 2, ephemeral: False, encrypted: True, "delete_on_termination": true }]} # hostnvme_multi: {vms_by_az: {a: 1, b: 0, c: 0}, flavor: i3en.2xlarge, auto_volumes: [], nvme: {volumes: [{mountpoint: "/var/log/mysvc", fstype: ext4, volume_size: 2500}, {mountpoint: "/var/log/mysvc2", fstype: ext4, volume_size: 2500}]} } # hostnvme_lvm: {vms_by_az: {a: 1, b: 0, c: 0}, flavor: i3en.2xlarge, auto_volumes: [], nvme: {volumes: [{mountpoint: "/var/log/mysvc", fstype: ext4, volume_size: 2500}, {mountpoint: "/var/log/mysvc", fstype: ext4, volume_size: 2500}], lvmparams: {vg_name: "vg0", lv_name: "lv0", lv_size: "+100%FREE"} } } -# hostssd: {vms_by_az: {a: 1, b: 0, c: 0}, flavor: c3.large, auto_volumes: [{device_name: "/dev/sdb", mountpoint: "/var/log/mysvc", fstype: "ext4", "volume_type": "gp2", "volume_size": 2, ephemeral: False, encrypted: True, "delete_on_termination": true}]} -# hosthdd: {vms_by_az: {a: 1, b: 0, c: 0}, flavor: h1.2xlarge, auto_volumes: [{device_name: "/dev/sdb", mountpoint: "/var/log/mysvc", fstype: "ext4", "volume_type": "gp2", "volume_size": 2, ephemeral: False, encrypted: True, "delete_on_termination": true}]} +# hostssd: {vms_by_az: {a: 1, b: 0, c: 0}, flavor: c3.large, auto_volumes: [{device_name: "/dev/sdf", mountpoint: "/var/log/mysvc", fstype: "ext4", "volume_type": "gp2", "volume_size": 2, ephemeral: False, encrypted: True, "delete_on_termination": true}]} +# hosthdd: {vms_by_az: {a: 1, b: 0, c: 0}, flavor: h1.2xlarge, auto_volumes: [{device_name: "/dev/sdf", mountpoint: "/var/log/mysvc", fstype: "ext4", "volume_type": "gp2", "volume_size": 2, ephemeral: False, encrypted: True, "delete_on_termination": true}]} aws_access_key: "" aws_secret_key: "" vpc_name: "test{{buildenv}}" diff --git a/_dependencies/tasks/main.yml b/_dependencies/tasks/main.yml index 8f25ca87..8cf92f20 100644 --- a/_dependencies/tasks/main.yml +++ b/_dependencies/tasks/main.yml @@ -33,5 +33,8 @@ when: "'custom_tagslabels' in cluster_vars" - assert: { that: "'{%- for hosttype in cluster_vars[buildenv].hosttype_vars -%}{% if ('version' in cluster_vars[buildenv].hosttype_vars[hosttype]) and (not cluster_vars[buildenv].hosttype_vars[hosttype].version is regex('^[a-z\\d\\-_]{0,63}$')) %}{{cluster_vars[buildenv].hosttype_vars[hosttype].version}}{% endif %}{%- endfor -%}' == ''", fail_msg: "Please ensure cluster_vars[{{buildenv}}].hosttype_vars[hosttype].version is in the set [a-z\\d\\-_], and <63 characters long." } + - assert: { that: "cluster_vars[buildenv] | json_query(\"hosttype_vars.*.auto_volumes[] | [?contains(`/dev/sdb,/dev/sdc,/dev/sdd,/dev/sde`, device_name)]\") | length == 0", msg: "/dev/sd[b-e] are not allowed as device_name in AWS cluster_vars[buildenv].hosttype_vars. Please start at /dev/sdf." } + when: cluster_vars.type == "aws" + - assert: { that: "(cluster_vars.assign_public_ip == 'yes' and cluster_vars.inventory_ip == 'public') or (cluster_vars.inventory_ip == 'private')", msg: "If inventory_ip=='public', 'assign_public_ip' must be 'yes'" } when: cluster_vars.type == "gcp" or cluster_vars.type == "aws" diff --git a/config/tasks/disks_auto.yml b/config/tasks/disks_auto.yml index ad3715c4..aefd1d5b 100644 --- a/config/tasks/disks_auto.yml +++ b/config/tasks/disks_auto.yml @@ -21,17 +21,17 @@ hostvols: | {% set res = [] -%} {% set tmp_blkvols = lsblk_volumes -%} - {% set inventory_hostname__no_suffix = inventory_hostname | regex_replace('-(?!.*-).*') -%} - {%- for autovol in cluster_hosts_target | to_json | from_json | json_query('[?starts_with(hostname, \'' + inventory_hostname__no_suffix + '\')].auto_volumes[]') -%} - {%- set blkvolloop = namespace(break=false) -%} - {%- for blkvol in tmp_blkvols if not blkvolloop.break -%} - {%- if (autovol.volume_size*1073741824|int) == (blkvol.size_b|int) -%} - {%- set _ = res.extend([ {'device': '/dev/'+blkvol.dev, 'mountpoint': autovol.mountpoint, 'fstype': autovol.fstype, 'perms': autovol.perms | default({})}]) -%} - {%- set _ = tmp_blkvols.remove(blkvol) -%} - {%- set blkvolloop.break = true -%} - {%- endif -%} - {%- endfor -%} + {% set inventory_hostname__no_suffix = inventory_hostname | regex_replace('-(?!.*-).*') -%} + {%- for autovol in cluster_hosts_target | to_json | from_json | json_query('[?starts_with(hostname, \'' + inventory_hostname__no_suffix + '\')].auto_volumes[]') -%} + {%- set blkvolloop = namespace(break=false) -%} + {%- for blkvol in tmp_blkvols if not blkvolloop.break -%} + {%- if (autovol.volume_size*1073741824|int) == (blkvol.size_b|int) -%} + {%- set _ = res.extend([ {'device': '/dev/'+blkvol.dev, 'mountpoint': autovol.mountpoint, 'fstype': autovol.fstype, 'perms': autovol.perms | default({})}]) -%} + {%- set _ = tmp_blkvols.remove(blkvol) -%} + {%- set blkvolloop.break = true -%} + {%- endif -%} {%- endfor -%} + {%- endfor -%} {{ res }} #- name: autodisks | hostvols diff --git a/create/tasks/aws.yml b/create/tasks/aws.yml index e779dc28..8300042a 100644 --- a/create/tasks/aws.yml +++ b/create/tasks/aws.yml @@ -20,6 +20,16 @@ - name: create/aws | Create EC2 VMs asynchronously and wait for completion block: + - name: create/aws | Detach volumes from previous instances (during the _scheme_rmvm_keepdisk_only redeploy, we only redeploy one host at a time, and it is already powered off) + ec2_vol: + aws_access_key: "{{cluster_vars[buildenv].aws_access_key}}" + aws_secret_key: "{{cluster_vars[buildenv].aws_secret_key}}" + region: "{{cluster_vars.region}}" + id: "{{item.auto_volume.src.volume_id}}" + instance: None + loop: "{{ cluster_hosts_target_denormalised_by_volume }}" + when: "'src' in item.auto_volume" + - name: create/aws | Create EC2 VMs asynchronously ec2: aws_access_key: "{{cluster_vars[buildenv].aws_access_key}}" @@ -35,9 +45,7 @@ wait: yes instance_tags: "{{ _instance_tags | combine(cluster_vars.custom_tagslabels | default({})) }}" termination_protection: "{{cluster_vars[buildenv].termination_protection}}" - volumes: "{{ item.auto_volumes | default([]) }}" - count_tag: - Name: "{{item.hostname}}" + count_tag: { Name: "{{item.hostname}}" } exact_count: 1 vars: _instance_tags: @@ -51,56 +59,82 @@ maintenance_mode: "true" release: "{{ release_version }}" lifecycle_state: "current" - with_items: "{{cluster_hosts_target}}" + loop: "{{ cluster_hosts_target }}" async: 7200 poll: 0 - register: aws_instances + register: r__ec2 - name: create/aws | Wait for aws instance creation to complete - async_status: - jid: "{{ item.ansible_job_id }}" - register: aws_jobs - until: aws_jobs.finished + async_status: { jid: "{{ item.ansible_job_id }}" } + register: r__async_status__ec2 + until: r__async_status__ec2.finished delay: 3 retries: 300 - with_items: "{{aws_instances.results}}" + with_items: "{{r__ec2.results}}" -# - name: create/aws | aws_jobs.results -# debug: msg={{aws_jobs.results}} + - name: create/aws | r__async_status__ec2.results + debug: msg={{r__async_status__ec2.results}} - name: create/aws | Set a fact containing the newly-created hosts set_fact: - cluster_hosts_created: "{{ aws_jobs.results | json_query(\"[?changed==`true`].item.item\") }}" + cluster_hosts_created: "{{ r__async_status__ec2.results | json_query(\"[?changed==`true`].item.item\") }}" - - name: create/aws | Force set maintenance_mode to true (when prometheus_set_unset_maintenance_mode) - ec2_tag: + - name: create/aws | Create new volumes asynchronously (or attach existing if src is present, e.g. via the _scheme_rmvm_keepdisk_only scheme) + ec2_vol: aws_access_key: "{{cluster_vars[buildenv].aws_access_key}}" aws_secret_key: "{{cluster_vars[buildenv].aws_secret_key}}" region: "{{cluster_vars.region}}" - resource: "{{ item }}" - tags: - maintenance_mode: "true" - with_items: "{{ aws_jobs.results | json_query('[].tagged_instances[0].id')}}" - when: (prometheus_set_unset_maintenance_mode is defined and prometheus_set_unset_maintenance_mode|bool) + instance: "{{ r__async_status__ec2.results | json_query(\"[].tagged_instances[?tags.Name==`\" + item.hostname + \"`].id[] | [0]\") | default(omit) }}" + id: "{{item.auto_volume.src.volume_id | default(omit)}}" + snapshot: "{{item.auto_volume.snapshot | default(omit)}}" + device_name: "{{item.auto_volume.device_name}}" + encrypted: "{{item.auto_volume.encrypted}}" + volume_size: "{%- if 'src' not in item.auto_volume -%}{{item.auto_volume.volume_size}}{%- endif -%}" + volume_type: "{{item.auto_volume.volume_type}}" + delete_on_termination: yes + loop: "{{ cluster_hosts_target_denormalised_by_volume }}" + async: 7200 + poll: 0 + register: r__ec2_vol + + - name: create/aws | Wait for volume creation/ attachment to complete + async_status: { jid: "{{ item.ansible_job_id }}" } + register: r__async_status__ec2_vol + until: r__async_status__ec2_vol.finished + delay: 3 + retries: 300 + with_items: "{{r__ec2_vol.results}}" + vars: + cluster_hosts_target_denormalised_by_volume: | + {% set res = [] -%} + {%- for cht_host in cluster_hosts_target -%} + {%- for autovol in cht_host.auto_volumes -%} + {%- set elem = {} -%} + {%- for cht_host_key in cht_host.keys() -%} + {%- if cht_host_key != 'auto_volumes' -%} + {%- set _ = elem.update({cht_host_key: cht_host[cht_host_key]}) -%} + {%- else -%} + {%- set _ = elem.update({'auto_volume': autovol}) -%} + {%- endif -%} + {%- endfor -%} + {%- set _ = res.append(elem) -%} + {%- endfor -%} + {%- endfor -%} + {{res}} - - name: create/aws | Extract EBS volume data so we can tag the disks - set_fact: - ebsdata: | - {% set res = [] -%} - {%- for host in aws_jobs.results -%} - {%- for devkey in host.tagged_instances[0].block_device_mapping.keys()-%} - {% set _dummy = res.extend([{ - 'hostname': host.tagged_instances[0].tags.Name, - 'ec2_id': host.tagged_instances[0].id, - 'device_name': devkey, - 'volume_id': host.tagged_instances[0].block_device_mapping[devkey].volume_id - }]) -%} - {%- endfor %} - {%- endfor %} - {{ res }} -# - name: create/aws | ebsdata -# debug: msg={{ebsdata}} +- name: create/aws | Tag the EBS volumes + block: + - name: create/aws | Get the ec2_instance_info for EBS tagging + ec2_instance_info: + filters: + "instance-state-name": ["running", "stopped"] + "tag:cluster_name": "{{cluster_name}}" + "tag:lifecycle_state": "current" + aws_access_key: "{{cluster_vars[buildenv].aws_access_key}}" + aws_secret_key: "{{cluster_vars[buildenv].aws_secret_key}}" + region: "{{cluster_vars.region}}" + register: r__ec2_instance_info - name: create/aws | Set the ec2 volume name tag ec2_tag: @@ -108,6 +142,20 @@ aws_secret_key: "{{cluster_vars[buildenv].aws_secret_key}}" region: "{{cluster_vars.region}}" resource: "{{item.volume_id}}" - tags: + tags: "{{ _tags | combine(cluster_vars.custom_tagslabels | default({})) }}" + with_items: "{{_ec2_vols_denormalised_by_device}}" + vars: + _ec2_vols_denormalised_by_device: | + {% set res = [] -%} + {%- for host_instance in r__ec2_instance_info.instances -%} + {%- for block_device in host_instance.block_device_mappings -%} + {% set _ = res.append({'hostname': host_instance.tags.Name, 'hosttype': host_instance.tags.hosttype, 'device_name': block_device.device_name, 'volume_id': block_device.ebs.volume_id}) -%} + {%- endfor %} + {%- endfor %} + {{ res }} + _tags: Name: "{{ item.hostname }}--{{item.device_name | regex_replace('^.*\\/(.*)', '\\1')}}" - with_items: "{{ebsdata}}" + inv_node_version: "{{cluster_vars[buildenv].hosttype_vars[item.hosttype].version | default(omit)}}" + inv_node_type: "{{item.hosttype}}" + owner: "{{ lookup('env','USER') | lower }}" + release: "{{ release_version }}" diff --git a/redeploy/_scheme_addallnew_rmdisk_rollback/tasks/main.yml b/redeploy/_scheme_addallnew_rmdisk_rollback/tasks/main.yml index 728a5618..29ee5ac6 100644 --- a/redeploy/_scheme_addallnew_rmdisk_rollback/tasks/main.yml +++ b/redeploy/_scheme_addallnew_rmdisk_rollback/tasks/main.yml @@ -5,7 +5,7 @@ - assert: { that: "non_current_hosts | length == 0", msg: "ERROR - All VMs must be in the 'current' lifecycle_state. Those not [{{non_current_hosts | join(',')}}]" } vars: non_current_hosts: "{{ cluster_hosts_state | json_query(\"[?tagslabels.lifecycle_state!='current'].name\") }}" - when: canary=="start" or canary=="none" + when: (canary=="start" or canary=="none") and (ignore_lifecycle_check is not defined or ignore_lifecycle_check==false) - name: Redeploy by replacing entire cluster; rollback on fail diff --git a/redeploy/_scheme_addnewvm_rmdisk_rollback/tasks/main.yml b/redeploy/_scheme_addnewvm_rmdisk_rollback/tasks/main.yml index 2a71a918..08f668a9 100644 --- a/redeploy/_scheme_addnewvm_rmdisk_rollback/tasks/main.yml +++ b/redeploy/_scheme_addnewvm_rmdisk_rollback/tasks/main.yml @@ -5,7 +5,7 @@ - assert: { that: "non_current_hosts | length == 0", msg: "ERROR - All VMs must be in the 'current' lifecycle_state. Those not [{{non_current_hosts | join(',')}}]" } vars: non_current_hosts: "{{ cluster_hosts_state | json_query(\"[?tagslabels.lifecycle_state!='current'].name\") }}" - when: canary=="start" or canary=="none" + when: (canary=="start" or canary=="none") and (ignore_lifecycle_check is not defined or ignore_lifecycle_check==false) - name: Redeploy by hosttype; rollback on fail diff --git a/redeploy/_scheme_rmvm_keepdisk_only/tasks/_add_diskinfo_esxifree.yml b/redeploy/_scheme_rmvm_keepdisk_only/tasks/_add_diskinfo_esxifree.yml deleted file mode 100644 index 40e7c103..00000000 --- a/redeploy/_scheme_rmvm_keepdisk_only/tasks/_add_diskinfo_esxifree.yml +++ /dev/null @@ -1,36 +0,0 @@ ---- - -- name: _get_diskinfo_esxifree | hosts_to_stop - debug: msg="{{hosts_to_stop}}" - -- name: _get_diskinfo_esxifree | vmware_guest_disk_info - vmware_guest_disk_info: - username: "{{ cluster_vars.username }}" - password: "{{ cluster_vars.password }}" - hostname: "{{ cluster_vars.esxi_ip }}" - datacenter: ha-datacenter - validate_certs: no - name: "{{item.name}}" - with_items: "{{hosts_to_stop}}" - register: r__vmware_guest_disk_info - -#- name: _get_diskinfo_esxifree | debug r__vmware_guest_disk_info -# debug: msg={{r__vmware_guest_disk_info}} - -- assert: { that: "r__vmware_guest_disk_info | json_query(\"results[].guest_disk_info.*[] | [?backing_datastore!='\" + cluster_vars.datastore + \"']\") | length == 0", msg: "Move is only possible if disks are on the same datastore." } - when: _scheme_rmvm_keepdisk_only__copy_or_move == "move" - -- name: _get_diskinfo_esxifree | augment cluster_host_redeploying's auto_volumes with source disk info - set_fact: - cluster_host_redeploying: | - {% set res = _cluster_host_redeploying_loopvar -%} - {%- for autovol in res.auto_volumes -%} - {%- for host_to_stop_diskinfo_result in r__vmware_guest_disk_info.results -%} - {%- if res.hostname | regex_replace('-(?!.*-).*') == host_to_stop_diskinfo_result.item.name | regex_replace('-(?!.*-).*') -%} - {%- for host_to_stop_diskinfo in host_to_stop_diskinfo_result.guest_disk_info | to_json | from_json | json_query('*| [?unit_number!=`0` && backing_type==\'FlatVer2\' && contains(backing_filename, \'--' + autovol.volname + '.vmdk\')]') -%} - {%- set _ = autovol.update({'volume_size': (host_to_stop_diskinfo.capacity_in_bytes/1073741824)|int, 'src': {'backing_filename': host_to_stop_diskinfo.backing_filename, 'copy_or_move': _scheme_rmvm_keepdisk_only__copy_or_move }}) -%} - {%- endfor -%} - {%- endif -%} - {%- endfor -%} - {%- endfor -%} - {{res}} diff --git a/redeploy/_scheme_rmvm_keepdisk_only/tasks/_add_src_diskinfo_to_cluster_hosts_target__aws.yml b/redeploy/_scheme_rmvm_keepdisk_only/tasks/_add_src_diskinfo_to_cluster_hosts_target__aws.yml new file mode 100644 index 00000000..fda28843 --- /dev/null +++ b/redeploy/_scheme_rmvm_keepdisk_only/tasks/_add_src_diskinfo_to_cluster_hosts_target__aws.yml @@ -0,0 +1,34 @@ +--- + +- name: _get_diskinfo_aws | ec2_instance_info + ec2_instance_info: + filters: + "instance-state-name": ["running", "stopped"] + "tag:cluster_name": "{{cluster_name}}" + "tag:lifecycle_state": "retiring" + aws_access_key: "{{cluster_vars[buildenv].aws_access_key}}" + aws_secret_key: "{{cluster_vars[buildenv].aws_secret_key}}" + region: "{{cluster_vars.region}}" + register: r__ec2_instance_info + +#- name: _get_diskinfo_aws | r__ec2_instance_info +# debug: msg={{r__ec2_instance_info}} + +- name: _get_diskinfo_aws | augment cluster_hosts_target auto_volumes with source disk info + set_fact: + cluster_hosts_target: | + {%- for cht_host in cluster_hosts_target -%} + {%- for cht_autovol in cht_host.auto_volumes -%} + {%- for chs_host_info_result in r__ec2_instance_info.instances -%} + {%- if cht_host.hostname | regex_replace('-(?!.*-).*') == chs_host_info_result.tags.Name | regex_replace('-(?!.*-).*') -%} + {%- for chs_host_diskinfo in chs_host_info_result.block_device_mappings | selectattr('device_name', '==', cht_autovol.device_name) -%} + {%- set _ = cht_autovol.update({'src': {'instance_id': chs_host_info_result.instance_id, 'volume_id': chs_host_diskinfo.ebs.volume_id }}) -%} + {%- endfor -%} + {%- endif -%} + {%- endfor -%} + {%- endfor -%} + {%- endfor -%} + {{cluster_hosts_target}} + +- name: _get_diskinfo_aws | cluster_hosts_target + debug: msg={{cluster_hosts_target}} diff --git a/redeploy/_scheme_rmvm_keepdisk_only/tasks/_add_src_diskinfo_to_cluster_hosts_target__esxifree.yml b/redeploy/_scheme_rmvm_keepdisk_only/tasks/_add_src_diskinfo_to_cluster_hosts_target__esxifree.yml new file mode 100644 index 00000000..ee89196e --- /dev/null +++ b/redeploy/_scheme_rmvm_keepdisk_only/tasks/_add_src_diskinfo_to_cluster_hosts_target__esxifree.yml @@ -0,0 +1,37 @@ +--- + +- name: _get_diskinfo_esxifree | vmware_guest_disk_info + vmware_guest_disk_info: + username: "{{ cluster_vars.username }}" + password: "{{ cluster_vars.password }}" + hostname: "{{ cluster_vars.esxi_ip }}" + datacenter: ha-datacenter + validate_certs: no + name: "{{item.name}}" + with_items: "{{cluster_hosts_state}}" + register: r__vmware_guest_disk_info + +#- name: _get_diskinfo_esxifree | r__vmware_guest_disk_info +# debug: msg={{r__vmware_guest_disk_info}} + +- assert: { that: "r__vmware_guest_disk_info | json_query(\"results[].guest_disk_info.*[] | [?backing_datastore!='\" + cluster_vars.datastore + \"']\") | length == 0", msg: "Move is only possible if disks are on the same datastore." } + when: _scheme_rmvm_keepdisk_only__copy_or_move == "move" + +- name: _get_diskinfo_esxifree | augment cluster_hosts_target auto_volumes with source disk info + set_fact: + cluster_hosts_target: | + {%- for cht_host in cluster_hosts_target -%} + {%- for cht_autovol in cht_host.auto_volumes -%} + {%- for chs_host_info_result in r__vmware_guest_disk_info.results -%} + {%- if cht_host.hostname | regex_replace('-(?!.*-).*') == chs_host_info_result.item.name | regex_replace('-(?!.*-).*') -%} + {%- for chs_host_diskinfo in chs_host_info_result.guest_disk_info | to_json | from_json | json_query('*| [?unit_number!=`0` && backing_type==\'FlatVer2\' && contains(backing_filename, \'--' + cht_autovol.volname + '.vmdk\')]') -%} + {%- set _ = cht_autovol.update({'volume_size': (chs_host_diskinfo.capacity_in_bytes/1073741824)|int, 'src': {'backing_filename': chs_host_diskinfo.backing_filename, 'copy_or_move': _scheme_rmvm_keepdisk_only__copy_or_move }}) -%} + {%- endfor -%} + {%- endif -%} + {%- endfor -%} + {%- endfor -%} + {%- endfor -%} + {{cluster_hosts_target}} + +#- name: _get_diskinfo_esxifree | cluster_hosts_target +# debug: msg={{cluster_hosts_target}} diff --git a/redeploy/_scheme_rmvm_keepdisk_only/tasks/by_hosttype.yml b/redeploy/_scheme_rmvm_keepdisk_only/tasks/by_hosttype.yml index 1bebc776..6cc2b25d 100644 --- a/redeploy/_scheme_rmvm_keepdisk_only/tasks/by_hosttype.yml +++ b/redeploy/_scheme_rmvm_keepdisk_only/tasks/by_hosttype.yml @@ -20,4 +20,4 @@ include_tasks: by_hosttype_by_host.yml with_items: "{{ hosts_to_redeploy }}" loop_control: - loop_var: _cluster_host_redeploying_loopvar + loop_var: host_to_redeploy diff --git a/redeploy/_scheme_rmvm_keepdisk_only/tasks/by_hosttype_by_host.yml b/redeploy/_scheme_rmvm_keepdisk_only/tasks/by_hosttype_by_host.yml index 14276ef3..12d27ac5 100644 --- a/redeploy/_scheme_rmvm_keepdisk_only/tasks/by_hosttype_by_host.yml +++ b/redeploy/_scheme_rmvm_keepdisk_only/tasks/by_hosttype_by_host.yml @@ -1,51 +1,34 @@ --- -- debug: msg="Attempting to redeploy {{_cluster_host_redeploying_loopvar.hostname}}" +- debug: msg="by_hosttype_by_host | Attempting to redeploy {{host_to_redeploy.hostname}}" -- name: by_hosttype_by_host | stop/ remove previous instance - block: - - name: by_hosttype_by_host | run predeleterole role - include_role: - name: "{{predeleterole}}" - when: predeleterole is defined and predeleterole != "" - - - name: by_hosttype_by_host | Power off old VM - include_role: - name: clusterverse/redeploy/__common - tasks_from: poweroff_vms.yml - - - name: by_hosttype_by_host | re-acquire the dynamic inventory - include_role: - name: clusterverse/dynamic_inventory - - - name: by_hosttype_by_host | re-acquire cluster_hosts_target and cluster_hosts_state - import_role: - name: clusterverse/cluster_hosts +- name: by_hosttype_by_host | run predeleterole role + include_role: + name: "{{predeleterole}}" + when: predeleterole is defined and predeleterole != "" - - name: by_hosttype_by_host | create cluster_host_redeploying with the disk info from hosts_to_stop - include_role: - name: "{{role_path}}" - tasks_from: "_add_diskinfo_{{cluster_vars.type}}.yml" +- name: by_hosttype_by_host | Power off old VM + include_role: + name: clusterverse/redeploy/__common + tasks_from: poweroff_vms.yml vars: - _root_cluster_host_redeploying: "{{_cluster_host_redeploying_loopvar.hostname | regex_replace('-(?!.*-).*')}}" #Remove the cluster_suffix from the hostname - hosts_to_stop: "{{ cluster_hosts_state | to_json | from_json | json_query(\"[?tagslabels.lifecycle_state=='retiring' && starts_with(name, '\" + _root_cluster_host_redeploying + \"')]\") }}" - -- name: by_hosttype_by_host | cluster_host_redeploying - debug: msg={{cluster_host_redeploying}} + _host_to_redeploy_nosuffix: "{{host_to_redeploy.hostname | regex_replace('-(?!.*-).*')}}" #Remove the cluster_suffix from the hostname + hosts_to_stop: "{{ cluster_hosts_state | to_json | from_json | json_query(\"[?tagslabels.lifecycle_state=='retiring' && starts_with(name, '\" + _host_to_redeploy_nosuffix + \"')]\") }}" -- name: "by_hosttype_by_host | Run {{mainclusteryml}} to add {{cluster_host_redeploying.hostname}} to cluster" - shell: "{{ (argv | join(' ')) | regex_replace('redeploy.yml', mainclusteryml) }} -e cluster_suffix={{cluster_suffix}} -e '{'cluster_hosts_target': [{{cluster_host_redeploying | to_json}}]}'" +- name: "by_hosttype_by_host | Run {{mainclusteryml}} to add {{host_to_redeploy.hostname}} to cluster" + shell: "{{ (argv | join(' ')) | regex_replace('redeploy.yml', mainclusteryml) }} -e cluster_suffix={{cluster_suffix}} -e '{'cluster_hosts_target': [{{host_to_redeploy | to_json}}]}'" register: r__mainclusteryml no_log: True ignore_errors: yes - debug: msg="{{[r__mainclusteryml.stdout_lines] + [r__mainclusteryml.stderr_lines]}}" failed_when: r__mainclusteryml is failed -# when: r__mainclusteryml is failed or (debug_nested_log_output is defined and debug_nested_log_output|bool) + when: r__mainclusteryml is failed or (debug_nested_log_output is defined and debug_nested_log_output|bool) - name: by_hosttype_by_host | re-acquire the dynamic inventory include_role: name: clusterverse/dynamic_inventory -- name: by_hosttype_by_host | re-acquire cluster_hosts_target and cluster_hosts_state +- name: by_hosttype_by_host | re-acquire cluster_hosts_state (NOT cluster_hosts_target, as we are augmenting this in _add_src_diskinfo_to_cluster_hosts_target__ (also, it is not affected by change of state)) import_role: name: clusterverse/cluster_hosts + tasks_from: get_cluster_hosts_state.yml diff --git a/redeploy/_scheme_rmvm_keepdisk_only/tasks/main.yml b/redeploy/_scheme_rmvm_keepdisk_only/tasks/main.yml index 67aeba86..61680183 100644 --- a/redeploy/_scheme_rmvm_keepdisk_only/tasks/main.yml +++ b/redeploy/_scheme_rmvm_keepdisk_only/tasks/main.yml @@ -3,17 +3,18 @@ - name: Preflight check block: - assert: { that: "_scheme_rmvm_keepdisk_only__copy_or_move is defined and _scheme_rmvm_keepdisk_only__copy_or_move in ['copy', 'move']", msg: "ERROR - _scheme_rmvm_keepdisk_only__copy_or_move must be defined and set to either 'copy' or 'move'" } + when: cluster_vars.type == "esxifree" - assert: { that: "non_current_hosts | length == 0", msg: "ERROR - All VMs must be in the 'current' lifecycle_state. Those not [{{non_current_hosts | join(',')}}]" } vars: non_current_hosts: "{{ cluster_hosts_state | json_query(\"[?tagslabels.lifecycle_state!='current'].name\") }}" - when: canary=="start" or canary=="none" + when: (canary=="start" or canary=="none") and (ignore_lifecycle_check is not defined or ignore_lifecycle_check==false) - assert: - that: "{{chs_hosts | difference(chf_hosts) | length==0}}" - msg: "Cannot use this scheme to redeploy to smaller cluster; [{{ chs_hosts | join(',') }}] > [{{ chf_hosts | join(',') }}]" + that: "{{chs_hosts | difference(cht_hosts) | length==0}}" + msg: "Cannot use this scheme to redeploy to smaller cluster; [{{ chs_hosts | join(',') }}] > [{{ cht_hosts | join(',') }}]" vars: - chf_hosts: "{{ cluster_hosts_target | json_query(\"[].hostname\") | map('regex_replace', '-(?!.*-).*') | list }}" + cht_hosts: "{{ cluster_hosts_target | json_query(\"[].hostname\") | map('regex_replace', '-(?!.*-).*') | list }}" chs_hosts: "{{ cluster_hosts_state | json_query(\"[].name\") | map('regex_replace', '-(?!.*-).*') | list }}" - name: Redeploy setup @@ -36,6 +37,9 @@ when: cluster_suffix is defined when: (canary=="start" or canary=="none") +- name: Add the disk info from previous instances to cluster_hosts_target + include_tasks: "_add_src_diskinfo_to_cluster_hosts_target__{{cluster_vars.type}}.yml" + - name: Run redeploy per hosttype. Create one at a time, then stop previous. include_tasks: by_hosttype.yml with_items: "{{ myhosttypes_array }}" @@ -58,7 +62,7 @@ - include_role: name: clusterverse/clean - tasks_from: "clean_vms_{{cluster_vars.type}}.yml" + tasks_from: clean_vms.yml when: (hosts_to_clean | length) - debug: From 71e2fbdb6d7867e9e93d36c5f9f150ba3843f588 Mon Sep 17 00:00:00 2001 From: Dougal Seeley Date: Wed, 23 Sep 2020 13:09:39 +0100 Subject: [PATCH 28/58] Add rollback to _scheme_rmvm_keepdisk_only and rename _scheme_rmvm_keepdisk_rollback --- .../group_vars/test_aws_euw1/cluster_vars.yml | 3 + .../group_vars/test_gcp_euw1/cluster_vars.yml | 1 + _dependencies/vars_plugins/cli_facts.py | 2 +- create/tasks/aws.yml | 8 +- .../tasks/set_lifecycle_state_label.yml | 1 + .../_scheme_rmvm_keepdisk_only/tasks/main.yml | 73 --------- ..._diskinfo_to_cluster_hosts_target__aws.yml | 3 +- ...info_to_cluster_hosts_target__esxifree.yml | 0 .../tasks/by_hosttype.yml | 0 .../tasks/by_hosttype_by_host.yml | 14 +- .../tasks/main.yml | 142 ++++++++++++++++++ 11 files changed, 166 insertions(+), 81 deletions(-) delete mode 100644 redeploy/_scheme_rmvm_keepdisk_only/tasks/main.yml rename redeploy/{_scheme_rmvm_keepdisk_only => _scheme_rmvm_keepdisk_rollback}/tasks/_add_src_diskinfo_to_cluster_hosts_target__aws.yml (95%) rename redeploy/{_scheme_rmvm_keepdisk_only => _scheme_rmvm_keepdisk_rollback}/tasks/_add_src_diskinfo_to_cluster_hosts_target__esxifree.yml (100%) rename redeploy/{_scheme_rmvm_keepdisk_only => _scheme_rmvm_keepdisk_rollback}/tasks/by_hosttype.yml (100%) rename redeploy/{_scheme_rmvm_keepdisk_only => _scheme_rmvm_keepdisk_rollback}/tasks/by_hosttype_by_host.yml (67%) create mode 100644 redeploy/_scheme_rmvm_keepdisk_rollback/tasks/main.yml diff --git a/EXAMPLE/group_vars/test_aws_euw1/cluster_vars.yml b/EXAMPLE/group_vars/test_aws_euw1/cluster_vars.yml index 24c6ed15..d3141a27 100644 --- a/EXAMPLE/group_vars/test_aws_euw1/cluster_vars.yml +++ b/EXAMPLE/group_vars/test_aws_euw1/cluster_vars.yml @@ -1,8 +1,11 @@ --- +redeploy_schemes_supported: ['_scheme_addallnew_rmdisk_rollback', '_scheme_addnewvm_rmdisk_rollback', '_scheme_rmvm_rmdisk_only', '_scheme_rmvm_keepdisk_rollback'] + #redeploy_scheme: _scheme_addallnew_rmdisk_rollback #redeploy_scheme: _scheme_addnewvm_rmdisk_rollback #redeploy_scheme: _scheme_rmvm_rmdisk_only +#redeploy_scheme: _scheme_rmvm_keepdisk_rollback app_name: "test" # The name of the application cluster (e.g. 'couchbase', 'nginx'); becomes part of cluster_name. app_class: "test" # The class of application (e.g. 'database', 'webserver'); becomes part of the fqdn diff --git a/EXAMPLE/group_vars/test_gcp_euw1/cluster_vars.yml b/EXAMPLE/group_vars/test_gcp_euw1/cluster_vars.yml index 4b08de80..6ee9dcd1 100644 --- a/EXAMPLE/group_vars/test_gcp_euw1/cluster_vars.yml +++ b/EXAMPLE/group_vars/test_gcp_euw1/cluster_vars.yml @@ -7,6 +7,7 @@ gcp_credentials_json: "{{ lookup('file', gcp_credentials_file) | default({'proje #redeploy_scheme: _scheme_addallnew_rmdisk_rollback #redeploy_scheme: _scheme_addnewvm_rmdisk_rollback #redeploy_scheme: _scheme_rmvm_rmdisk_only +#redeploy_scheme: _scheme_rmvm_keepdisk_rollback app_name: "test" # The name of the application cluster (e.g. 'couchbase', 'nginx'); becomes part of cluster_name. app_class: "test" # The class of application (e.g. 'database', 'webserver'); becomes part of the fqdn diff --git a/_dependencies/vars_plugins/cli_facts.py b/_dependencies/vars_plugins/cli_facts.py index 5cd07ccf..ea113fa7 100644 --- a/_dependencies/vars_plugins/cli_facts.py +++ b/_dependencies/vars_plugins/cli_facts.py @@ -7,7 +7,7 @@ DOCUMENTATION = ''' --- -cars: cli_facts +vars: argv, cliargs short_description: Expose the system ARGV and CLI arguments as facts in plays. version_added: "2.8" author: "Dougal Seeley" diff --git a/create/tasks/aws.yml b/create/tasks/aws.yml index 8300042a..af0ec685 100644 --- a/create/tasks/aws.yml +++ b/create/tasks/aws.yml @@ -20,7 +20,7 @@ - name: create/aws | Create EC2 VMs asynchronously and wait for completion block: - - name: create/aws | Detach volumes from previous instances (during the _scheme_rmvm_keepdisk_only redeploy, we only redeploy one host at a time, and it is already powered off) + - name: create/aws | Detach volumes from previous instances (during the _scheme_rmvm_keepdisk_rollback redeploy, we only redeploy one host at a time, and it is already powered off) ec2_vol: aws_access_key: "{{cluster_vars[buildenv].aws_access_key}}" aws_secret_key: "{{cluster_vars[buildenv].aws_secret_key}}" @@ -72,14 +72,14 @@ retries: 300 with_items: "{{r__ec2.results}}" - - name: create/aws | r__async_status__ec2.results - debug: msg={{r__async_status__ec2.results}} +# - name: create/aws | r__async_status__ec2.results +# debug: msg={{r__async_status__ec2.results}} - name: create/aws | Set a fact containing the newly-created hosts set_fact: cluster_hosts_created: "{{ r__async_status__ec2.results | json_query(\"[?changed==`true`].item.item\") }}" - - name: create/aws | Create new volumes asynchronously (or attach existing if src is present, e.g. via the _scheme_rmvm_keepdisk_only scheme) + - name: create/aws | Create new volumes asynchronously (or attach existing if src is present, e.g. via the _scheme_rmvm_keepdisk_rollback scheme) ec2_vol: aws_access_key: "{{cluster_vars[buildenv].aws_access_key}}" aws_secret_key: "{{cluster_vars[buildenv].aws_secret_key}}" diff --git a/redeploy/__common/tasks/set_lifecycle_state_label.yml b/redeploy/__common/tasks/set_lifecycle_state_label.yml index 93c582be..093a50be 100644 --- a/redeploy/__common/tasks/set_lifecycle_state_label.yml +++ b/redeploy/__common/tasks/set_lifecycle_state_label.yml @@ -1,4 +1,5 @@ --- + - name: set_lifecycle_state_label | hosts_to_relabel debug: msg="{{hosts_to_relabel}}" diff --git a/redeploy/_scheme_rmvm_keepdisk_only/tasks/main.yml b/redeploy/_scheme_rmvm_keepdisk_only/tasks/main.yml deleted file mode 100644 index 61680183..00000000 --- a/redeploy/_scheme_rmvm_keepdisk_only/tasks/main.yml +++ /dev/null @@ -1,73 +0,0 @@ ---- - -- name: Preflight check - block: - - assert: { that: "_scheme_rmvm_keepdisk_only__copy_or_move is defined and _scheme_rmvm_keepdisk_only__copy_or_move in ['copy', 'move']", msg: "ERROR - _scheme_rmvm_keepdisk_only__copy_or_move must be defined and set to either 'copy' or 'move'" } - when: cluster_vars.type == "esxifree" - - - assert: { that: "non_current_hosts | length == 0", msg: "ERROR - All VMs must be in the 'current' lifecycle_state. Those not [{{non_current_hosts | join(',')}}]" } - vars: - non_current_hosts: "{{ cluster_hosts_state | json_query(\"[?tagslabels.lifecycle_state!='current'].name\") }}" - when: (canary=="start" or canary=="none") and (ignore_lifecycle_check is not defined or ignore_lifecycle_check==false) - - - assert: - that: "{{chs_hosts | difference(cht_hosts) | length==0}}" - msg: "Cannot use this scheme to redeploy to smaller cluster; [{{ chs_hosts | join(',') }}] > [{{ cht_hosts | join(',') }}]" - vars: - cht_hosts: "{{ cluster_hosts_target | json_query(\"[].hostname\") | map('regex_replace', '-(?!.*-).*') | list }}" - chs_hosts: "{{ cluster_hosts_state | json_query(\"[].name\") | map('regex_replace', '-(?!.*-).*') | list }}" - -- name: Redeploy setup - block: - - name: Change lifecycle_state label from 'current' to 'retiring' - include_role: - name: clusterverse/redeploy/__common - tasks_from: set_lifecycle_state_label.yml - vars: - hosts_to_relabel: "{{ cluster_hosts_state | json_query(\"[?tagslabels.lifecycle_state=='current']\") }}" - new_state: "retiring" - when: ('retiring' not in (cluster_hosts_state | map(attribute='tagslabels.lifecycle_state'))) - - - name: re-acquire cluster_hosts_target and cluster_hosts_state - include_role: - name: clusterverse/cluster_hosts - public: yes - - - assert: { that: "cluster_hosts_state | json_query(\"[?tagslabels.cluster_suffix == '\"+ cluster_suffix +\"']\") | length == 0", msg: "Please ensure cluster_suffix ({{cluster_suffix}}) is not already set on the cluster" } - when: cluster_suffix is defined - when: (canary=="start" or canary=="none") - -- name: Add the disk info from previous instances to cluster_hosts_target - include_tasks: "_add_src_diskinfo_to_cluster_hosts_target__{{cluster_vars.type}}.yml" - -- name: Run redeploy per hosttype. Create one at a time, then stop previous. - include_tasks: by_hosttype.yml - with_items: "{{ myhosttypes_array }}" - loop_control: - loop_var: hosttype - vars: - cluster_hosts_target_by_hosttype: "{{cluster_hosts_target | dict_agg('hosttype')}}" - myhosttypes_array: "{%- if myhosttypes is defined -%} {{ myhosttypes.split(',') }} {%- else -%} {{ cluster_hosts_target_by_hosttype.keys() | list }} {%- endif -%}" - when: canary!="tidy" - - -- name: "Tidy up powered-down, non-current instances. NOTE: Must do clean_dns first, because both clean_dns and clean_vms have the cluster_hosts role as a dependency, which when run after clean_vms, will be empty." - block: - - assert: { that: "'current' in (cluster_hosts_state | map(attribute='tagslabels.lifecycle_state'))", msg: "ERROR - Cannot tidy when there are no machines in the 'current' lifecycle_state. Please use '-e clean=_all_'." } - - - include_role: - name: clusterverse/clean - tasks_from: clean_dns.yml - when: (hosts_to_clean | length) and (cluster_vars.dns_server is defined and cluster_vars.dns_server != "") and (cluster_vars.dns_user_domain is defined and cluster_vars.dns_user_domain != "") - - - include_role: - name: clusterverse/clean - tasks_from: clean_vms.yml - when: (hosts_to_clean | length) - - - debug: - msg: "tidy | No hosts to tidy. Only powered-down, non-current machines with be tidied; to clean other machines, please use the '-e clean=' extra variable." - when: hosts_to_clean | length == 0 - vars: - hosts_to_clean: "{{ cluster_hosts_state | json_query(\"[?tagslabels.lifecycle_state!='current' && !(contains('RUNNING,running', instance_state))]\") }}" - when: canary=="tidy" or ((canary=="none" or canary=="finish") and canary_tidy_on_success is defined and canary_tidy_on_success|bool) diff --git a/redeploy/_scheme_rmvm_keepdisk_only/tasks/_add_src_diskinfo_to_cluster_hosts_target__aws.yml b/redeploy/_scheme_rmvm_keepdisk_rollback/tasks/_add_src_diskinfo_to_cluster_hosts_target__aws.yml similarity index 95% rename from redeploy/_scheme_rmvm_keepdisk_only/tasks/_add_src_diskinfo_to_cluster_hosts_target__aws.yml rename to redeploy/_scheme_rmvm_keepdisk_rollback/tasks/_add_src_diskinfo_to_cluster_hosts_target__aws.yml index fda28843..20bde44d 100644 --- a/redeploy/_scheme_rmvm_keepdisk_only/tasks/_add_src_diskinfo_to_cluster_hosts_target__aws.yml +++ b/redeploy/_scheme_rmvm_keepdisk_rollback/tasks/_add_src_diskinfo_to_cluster_hosts_target__aws.yml @@ -5,7 +5,6 @@ filters: "instance-state-name": ["running", "stopped"] "tag:cluster_name": "{{cluster_name}}" - "tag:lifecycle_state": "retiring" aws_access_key: "{{cluster_vars[buildenv].aws_access_key}}" aws_secret_key: "{{cluster_vars[buildenv].aws_secret_key}}" region: "{{cluster_vars.region}}" @@ -19,7 +18,7 @@ cluster_hosts_target: | {%- for cht_host in cluster_hosts_target -%} {%- for cht_autovol in cht_host.auto_volumes -%} - {%- for chs_host_info_result in r__ec2_instance_info.instances -%} + {%- for chs_host_info_result in r__ec2_instance_info.instances | selectattr('tags.lifecycle_state', '!=', 'current') -%} {%- if cht_host.hostname | regex_replace('-(?!.*-).*') == chs_host_info_result.tags.Name | regex_replace('-(?!.*-).*') -%} {%- for chs_host_diskinfo in chs_host_info_result.block_device_mappings | selectattr('device_name', '==', cht_autovol.device_name) -%} {%- set _ = cht_autovol.update({'src': {'instance_id': chs_host_info_result.instance_id, 'volume_id': chs_host_diskinfo.ebs.volume_id }}) -%} diff --git a/redeploy/_scheme_rmvm_keepdisk_only/tasks/_add_src_diskinfo_to_cluster_hosts_target__esxifree.yml b/redeploy/_scheme_rmvm_keepdisk_rollback/tasks/_add_src_diskinfo_to_cluster_hosts_target__esxifree.yml similarity index 100% rename from redeploy/_scheme_rmvm_keepdisk_only/tasks/_add_src_diskinfo_to_cluster_hosts_target__esxifree.yml rename to redeploy/_scheme_rmvm_keepdisk_rollback/tasks/_add_src_diskinfo_to_cluster_hosts_target__esxifree.yml diff --git a/redeploy/_scheme_rmvm_keepdisk_only/tasks/by_hosttype.yml b/redeploy/_scheme_rmvm_keepdisk_rollback/tasks/by_hosttype.yml similarity index 100% rename from redeploy/_scheme_rmvm_keepdisk_only/tasks/by_hosttype.yml rename to redeploy/_scheme_rmvm_keepdisk_rollback/tasks/by_hosttype.yml diff --git a/redeploy/_scheme_rmvm_keepdisk_only/tasks/by_hosttype_by_host.yml b/redeploy/_scheme_rmvm_keepdisk_rollback/tasks/by_hosttype_by_host.yml similarity index 67% rename from redeploy/_scheme_rmvm_keepdisk_only/tasks/by_hosttype_by_host.yml rename to redeploy/_scheme_rmvm_keepdisk_rollback/tasks/by_hosttype_by_host.yml index 12d27ac5..bcc20497 100644 --- a/redeploy/_scheme_rmvm_keepdisk_only/tasks/by_hosttype_by_host.yml +++ b/redeploy/_scheme_rmvm_keepdisk_rollback/tasks/by_hosttype_by_host.yml @@ -13,7 +13,7 @@ tasks_from: poweroff_vms.yml vars: _host_to_redeploy_nosuffix: "{{host_to_redeploy.hostname | regex_replace('-(?!.*-).*')}}" #Remove the cluster_suffix from the hostname - hosts_to_stop: "{{ cluster_hosts_state | to_json | from_json | json_query(\"[?tagslabels.lifecycle_state=='retiring' && starts_with(name, '\" + _host_to_redeploy_nosuffix + \"')]\") }}" + hosts_to_stop: "{{ cluster_hosts_state | to_json | from_json | json_query(\"[?tagslabels.lifecycle_state!='current' && starts_with(name, '\" + _host_to_redeploy_nosuffix + \"')]\") }}" - name: "by_hosttype_by_host | Run {{mainclusteryml}} to add {{host_to_redeploy.hostname}} to cluster" shell: "{{ (argv | join(' ')) | regex_replace('redeploy.yml', mainclusteryml) }} -e cluster_suffix={{cluster_suffix}} -e '{'cluster_hosts_target': [{{host_to_redeploy | to_json}}]}'" @@ -24,6 +24,18 @@ failed_when: r__mainclusteryml is failed when: r__mainclusteryml is failed or (debug_nested_log_output is defined and debug_nested_log_output|bool) +- name: by_hosttype_by_host | re-acquire cluster_hosts_state (NOT cluster_hosts_target, as we are augmenting this in _add_src_diskinfo_to_cluster_hosts_target__ (also, it is not affected by change of state)) + import_role: + name: clusterverse/cluster_hosts + tasks_from: get_cluster_hosts_state.yml + +- name: by_hosttype_by_host | Power on new VM (not needed for normal redeploy, but for rescue case) + include_role: + name: clusterverse/redeploy/__common + tasks_from: poweron_vms.yml + vars: + hosts_to_start: "{{ cluster_hosts_state | selectattr('name', '==', host_to_redeploy.hostname) | list }}" + - name: by_hosttype_by_host | re-acquire the dynamic inventory include_role: name: clusterverse/dynamic_inventory diff --git a/redeploy/_scheme_rmvm_keepdisk_rollback/tasks/main.yml b/redeploy/_scheme_rmvm_keepdisk_rollback/tasks/main.yml new file mode 100644 index 00000000..328599e7 --- /dev/null +++ b/redeploy/_scheme_rmvm_keepdisk_rollback/tasks/main.yml @@ -0,0 +1,142 @@ +--- + +- name: Preflight check + block: + - block: + - name: Preflight check | ec2_instance_info + ec2_instance_info: + filters: { "instance-state-name": [ "running", "stopped" ], "tag:cluster_name": "{{cluster_name}}", "tag:lifecycle_state": "current" } + aws_access_key: "{{cluster_vars[buildenv].aws_access_key}}" + aws_secret_key: "{{cluster_vars[buildenv].aws_secret_key}}" + region: "{{cluster_vars.region}}" + register: r__ec2_instance_info + + - assert: { that: "_invalid_disks | length == 0", msg: "EBS disks with a device_name of /dev/sd[b-e] cannot be reattached to a new instance (an AWS limitation) [found on: {{ _invalid_disks | join(',')}}]. To replace these, you must use a redeploy scheme that copies the disks." } + vars: { _invalid_disks: "{{ r__ec2_instance_info.instances | json_query(\"[?block_device_mappings[?contains(`/dev/sdb,/dev/sdc,/dev/sdd,/dev/sde`, device_name)]].tags.Name\") }}" } + when: cluster_vars.type == "aws" + + - assert: { that: "_scheme_rmvm_keepdisk_only__copy_or_move is defined and _scheme_rmvm_keepdisk_only__copy_or_move in ['copy', 'move']", msg: "ERROR - _scheme_rmvm_keepdisk_only__copy_or_move must be defined and set to either 'copy' or 'move'" } + when: cluster_vars.type == "esxifree" + + - assert: { that: "non_current_hosts | length == 0", msg: "ERROR - All VMs must be in the 'current' lifecycle_state. Those not [{{non_current_hosts | join(',')}}]" } + vars: + non_current_hosts: "{{ cluster_hosts_state | json_query(\"[?tagslabels.lifecycle_state!='current'].name\") }}" + when: (canary=="start" or canary=="none") and (ignore_lifecycle_check is not defined or ignore_lifecycle_check==false) + + - assert: + that: "{{chs_hosts | difference(cht_hosts) | length==0}}" + msg: "Cannot use this scheme to redeploy to smaller cluster; [{{ chs_hosts | join(',') }}] > [{{ cht_hosts | join(',') }}]" + vars: + cht_hosts: "{{ cluster_hosts_target | json_query(\"[].hostname\") | map('regex_replace', '-(?!.*-).*') | list }}" + chs_hosts: "{{ cluster_hosts_state | json_query(\"[].name\") | map('regex_replace', '-(?!.*-).*') | list }}" + +- name: Redeploy by hosttype; rollback on fail + block: + - name: Redeploy setup + block: + - name: Change lifecycle_state label from 'current' to 'retiring' + include_role: + name: clusterverse/redeploy/__common + tasks_from: set_lifecycle_state_label.yml + vars: + hosts_to_relabel: "{{ cluster_hosts_state | json_query(\"[?tagslabels.lifecycle_state=='current']\") }}" + new_state: "retiring" + when: ('retiring' not in (cluster_hosts_state | map(attribute='tagslabels.lifecycle_state'))) + + - name: re-acquire cluster_hosts_target and cluster_hosts_state + include_role: + name: clusterverse/cluster_hosts + public: yes + + - assert: { that: "cluster_hosts_state | json_query(\"[?tagslabels.cluster_suffix == '\"+ cluster_suffix +\"']\") | length == 0", msg: "Please ensure cluster_suffix ({{cluster_suffix}}) is not already set on the cluster" } + when: cluster_suffix is defined + when: (canary=="start" or canary=="none") + + - name: Add the disk info from previous instances to cluster_hosts_target + include_tasks: "_add_src_diskinfo_to_cluster_hosts_target__{{cluster_vars.type}}.yml" + + - name: Run redeploy per hosttype. Create one at a time, then stop previous. + include_tasks: by_hosttype.yml + with_items: "{{ myhosttypes_array }}" + loop_control: + loop_var: hosttype + vars: + cluster_hosts_target_by_hosttype: "{{cluster_hosts_target | dict_agg('hosttype')}}" + myhosttypes_array: "{%- if myhosttypes is defined -%} {{ myhosttypes.split(',') }} {%- else -%} {{ cluster_hosts_target_by_hosttype.keys() | list }} {%- endif -%}" + + - fail: + when: testfail is defined and testfail == "fail_1" + + - name: re-acquire cluster_hosts_target and cluster_hosts_state (For the '-e canary=tidy' option. This can't be run in the tidy block below because that block depends on this info being correct) + import_role: + name: clusterverse/cluster_hosts + when: (canary_tidy_on_success is defined and canary_tidy_on_success|bool) + + rescue: + - debug: msg="Rescuing" + + - name: rescue | re-acquire cluster_hosts_target and cluster_hosts_state + import_role: + name: clusterverse/cluster_hosts + + - name: rescue | Change lifecycle_state label from 'current' to 'redeployfail' + include_role: + name: clusterverse/redeploy/__common + tasks_from: set_lifecycle_state_label.yml + vars: + hosts_to_relabel: "{{ cluster_hosts_state | json_query(\"[?tagslabels.lifecycle_state=='current']\") }}" + new_state: "redeployfail" + + - name: rescue | Change lifecycle_state label from 'retiring' to 'current' + include_role: + name: clusterverse/redeploy/__common + tasks_from: set_lifecycle_state_label.yml + vars: + hosts_to_relabel: "{{ cluster_hosts_state | json_query(\"[?tagslabels.lifecycle_state=='retiring']\") }}" + new_state: "current" + + - name: rescue | re-acquire cluster_hosts_target and cluster_hosts_state + import_role: + name: clusterverse/cluster_hosts + + - name: rescue | Add the disk info from previous instances to cluster_hosts_target + include_tasks: "_add_src_diskinfo_to_cluster_hosts_target__{{cluster_vars.type}}.yml" + + - name: rescue | explicitly specify only the relevant cluster.yml roles to run for rescuing + set_fact: + argv: "{{argv + ['--tags'] + ['clusterverse_create,clusterverse_dynamic_inventory,clusterverse_readiness'] }}" + + - name: rescue | Run redeploy per hosttype. Create one at a time, then stop previous. + include_tasks: by_hosttype.yml + with_items: "{{ myhosttypes_array }}" + loop_control: + loop_var: hosttype + vars: + cluster_hosts_target_by_hosttype: "{{cluster_hosts_target | dict_agg('hosttype')}}" + myhosttypes_array: "{%- if myhosttypes is defined -%} {{ myhosttypes.split(',') }} {%- else -%} {{ cluster_hosts_target_by_hosttype.keys() | list }} {%- endif -%}" + + - name: rescue | end_play to prevent tidying of pre-rescued VMs + meta: end_play + when: canary!="tidy" + + +- name: "Tidy up powered-down, non-current instances. NOTE: Must do clean_dns first, because both clean_dns and clean_vms have the cluster_hosts role as a dependency, which when run after clean_vms, will be empty." + block: + - assert: { that: "'current' in (cluster_hosts_state | map(attribute='tagslabels.lifecycle_state'))", msg: "ERROR - Cannot tidy when there are no machines in the 'current' lifecycle_state. Please use '-e clean=_all_'." } + + - include_role: + name: clusterverse/clean + tasks_from: clean_dns.yml + when: (hosts_to_clean | length) and (cluster_vars.dns_server is defined and cluster_vars.dns_server != "") and (cluster_vars.dns_user_domain is defined and cluster_vars.dns_user_domain != "") + + - include_role: + name: clusterverse/clean + tasks_from: clean_vms.yml + when: (hosts_to_clean | length) + + - debug: + msg: "tidy | No hosts to tidy. Only powered-down, non-current machines with be tidied; to clean other machines, please use the '-e clean=' extra variable." + when: hosts_to_clean | length == 0 + vars: + hosts_to_clean: "{{ cluster_hosts_state | json_query(\"[?tagslabels.lifecycle_state!='current' && !(contains('RUNNING,running', instance_state))]\") }}" + when: canary=="tidy" or ((canary=="none" or canary=="finish") and canary_tidy_on_success is defined and canary_tidy_on_success|bool) From ca8ffd185b456277dfaf06c253e004df5cd22fa5 Mon Sep 17 00:00:00 2001 From: Dougal Seeley Date: Mon, 28 Sep 2020 13:00:26 +0100 Subject: [PATCH 29/58] Update to support rollback --- README.md | 13 +- _dependencies/library/esxifree_guest.py | 199 ++++++++++-------- create/tasks/aws.yml | 17 -- create/tasks/esxifree.yml | 4 +- create/tasks/main.yml | 18 ++ redeploy/__common/tasks/poweroff_vms.yml | 3 +- redeploy/__common/tasks/poweron_vms.yml | 5 +- .../tasks/set_lifecycle_state_label.yml | 2 +- 8 files changed, 150 insertions(+), 111 deletions(-) diff --git a/README.md b/README.md index f6a00e5f..33b6c03e 100644 --- a/README.md +++ b/README.md @@ -111,7 +111,7 @@ The role is designed to run in two modes: + For each node in the cluster: + Run `predeleterole` + Delete the node - + Run the main cluster.yml, which forces the missing node to be redeployed. Run with the same parameters as for the main playbook. + + Run the main cluster.yml (with the same parameters as for the main playbook), which forces the missing node to be redeployed (the `cluster_suffix` remains the same). + If the process fails at any point: + No further VMs will be deleted or rebuilt - the playbook stops. + **_scheme_addnewvm_rmdisk_rollback** @@ -130,3 +130,14 @@ The role is designed to run in two modes: + The old VMs are stopped. + If the process fails for any reason, the old VMs are reinstated, and the new VMs stopped (rollback) + To delete the old VMs, either set '-e canary_tidy_on_success=true', or call redeploy.yml with '-e canary=tidy' + + **_scheme_rmvm_keepdisk_rollback** + + _Cluster topology must remain identical_ + + **It assumes a resilient deployment (it can tolerate one node being removed from the cluster).** + + For each node in the cluster: + + Run `predeleterole` + + Stop the node + + Detach the disks from the old node + + Run the main cluster.yml to create a new node + + Attach disks to new node + + If the process fails for any reason, the old VMs are reinstated (and the disks reattached to the old nodes), and the new VMs stopped (rollback) + + To delete the old VMs, either set '-e canary_tidy_on_success=true', or call redeploy.yml with '-e canary=tidy' diff --git a/_dependencies/library/esxifree_guest.py b/_dependencies/library/esxifree_guest.py index 481dbbf9..5523a089 100644 --- a/_dependencies/library/esxifree_guest.py +++ b/_dependencies/library/esxifree_guest.py @@ -331,6 +331,7 @@ sample: None ''' +import os import time import re import json @@ -756,37 +757,6 @@ def create_vm(self, vmTemplate=None, annotation=None, datastore=None, hardware=N if len(bootDisks) == 1: return ("Boot disk parameters defined for cloned VM. Ambiguous requirement - not supported.") - newDisks = [newDisk for newDisk in disks if 'boot' not in newDisk] - for newDiskCount,newDisk in enumerate(newDisks): - scsiDiskIdx = newDiskCount + diskCount - disk_filename = self.name + "--" + newDisk['volname'] + ".vmdk" - - #Check if new disk already exists - if so, exit - try: - (stdin, stdout, stderr) = self.esxiCnx.exec_command("stat " + vmPathDest + "/" + disk_filename) - except IOError as e: - if 'src' in newDisk and newDisk['src'] is not None: - cloneSrcBackingFile = re.search('^\[(?P.*?)\] *(?P.*\/(?P(?P.*?)(?:--(?P.*?))?\.vmdk))$', newDisk['src']['backing_filename']) - try: - (stdin, stdout, stderr) = self.esxiCnx.exec_command("stat /vmfs/volumes/" + cloneSrcBackingFile.group('datastore') + "/" + cloneSrcBackingFile.group('fulldiskpath')) - except IOError as e: - return (cloneSrcBackingFile.group('fulldiskpath') + " not found!\n" + str(e)) - else: - if newDisk['src']['copy_or_move'] == 'copy': - self.esxiCnx.exec_command("vmkfstools -i /vmfs/volumes/" + cloneSrcBackingFile.group('datastore') + "/" + cloneSrcBackingFile.group('fulldiskpath') + " -d thin " + vmPathDest + "/" + disk_filename) - else: - self.esxiCnx.exec_command("vmkfstools -E /vmfs/volumes/" + cloneSrcBackingFile.group('datastore') + "/" + cloneSrcBackingFile.group('fulldiskpath') + " " + vmPathDest + "/" + disk_filename) - - else: - (stdin, stdout, stderr) = self.esxiCnx.exec_command("vmkfstools -c " + str(newDisk['size_gb']) + "G -d " + newDisk['type'] + " " + vmPathDest + "/" + disk_filename) - - vmxDict.update({"scsi0:" + str(scsiDiskIdx) + ".devicetype": "scsi-hardDisk"}) - vmxDict.update({"scsi0:" + str(scsiDiskIdx) + ".present": "TRUE"}) - vmxDict.update({"scsi0:" + str(scsiDiskIdx) + ".filename": disk_filename}) - diskCount = diskCount + 1 - else: - return (disk_filename + " already present!") - # write the vmx self.put_vmx(vmxDict, vmPathDest + "/" + self.name + ".vmx") @@ -794,7 +764,11 @@ def create_vm(self, vmTemplate=None, annotation=None, datastore=None, hardware=N (stdin, stdout, stderr) = self.esxiCnx.exec_command("vim-cmd solo/registervm " + vmPathDest + "/" + self.name + ".vmx") self.moid = int(stdout.readlines()[0]) - def update_vm(self, annotation=None): + # The logic used to update the disks is the same for an existing as a new VM. + self.update_vm(annotation=None, disks=disks) + + def update_vm(self, annotation, disks): + vmxPath, vmxDict = self.get_vmx(self.moid) if annotation: # Update the config (annotation) in the running VM response, cookies = self.soap_client.send_req('<_this type="VirtualMachine">' + str(self.moid) + '' + annotation + '') @@ -803,9 +777,46 @@ def update_vm(self, annotation=None): return ("Failed to ReconfigVM_Task: %s" % waitresp) # Now update the disk (should not be necessary, but for some reason, sometimes the ReconfigVM_Task does not flush config to disk). - vmxPath, vmxDict = self.get_vmx(self.moid) vmxDict.update({"annotation": annotation}) - self.put_vmx(vmxDict, vmxPath) + + if disks: + curDisks = [{"filename": vmxDict[scsiDisk], "volname": re.sub(r".*--([\w\d]+)\.vmdk", r"\1", vmxDict[scsiDisk])} for scsiDisk in sorted(vmxDict) if re.match(r"scsi0:\d\.filename", scsiDisk)] + curDisksCount = len(curDisks) + newDisks = [newDisk for newDisk in disks if ('boot' not in newDisk or newDisk['boot'] == False)] + for newDiskCount,newDisk in enumerate(newDisks): + scsiDiskIdx = newDiskCount + curDisksCount + disk_filename = self.name + "--" + newDisk['volname'] + ".vmdk" + + #Don't clone already existing disks + try: + (stdin, stdout, stderr) = self.esxiCnx.exec_command("stat " + os.path.dirname(vmxPath) + "/" + disk_filename) + except IOError as e: + if 'src' in newDisk and newDisk['src'] is not None: + cloneSrcBackingFile = re.search('^\[(?P.*?)\] *(?P.*\/(?P(?P.*?)(?:--(?P.*?))?\.vmdk))$', newDisk['src']['backing_filename']) + try: + (stdin, stdout, stderr) = self.esxiCnx.exec_command("stat /vmfs/volumes/" + cloneSrcBackingFile.group('datastore') + "/" + cloneSrcBackingFile.group('fulldiskpath')) + except IOError as e: + return (cloneSrcBackingFile.group('fulldiskpath') + " not found!\n" + str(e)) + else: + if newDisk['src']['copy_or_move'] == 'copy': + self.esxiCnx.exec_command("vmkfstools -i /vmfs/volumes/" + cloneSrcBackingFile.group('datastore') + "/" + cloneSrcBackingFile.group('fulldiskpath') + " -d thin " + os.path.dirname(vmxPath) + "/" + disk_filename) + else: + self.esxiCnx.exec_command("vmkfstools -E /vmfs/volumes/" + cloneSrcBackingFile.group('datastore') + "/" + cloneSrcBackingFile.group('fulldiskpath') + " " + os.path.dirname(vmxPath) + "/" + disk_filename) + + else: + (stdin, stdout, stderr) = self.esxiCnx.exec_command("vmkfstools -c " + str(newDisk['size_gb']) + "G -d " + newDisk['type'] + " " + os.path.dirname(vmxPath) + "/" + disk_filename) + + # if this is a new disk, not a restatement of an existing disk: + if len(curDisks) >= newDiskCount+2 and curDisks[newDiskCount+1]['volname'] == newDisk['volname']: + pass + else: + vmxDict.update({"scsi0:" + str(scsiDiskIdx) + ".devicetype": "scsi-hardDisk"}) + vmxDict.update({"scsi0:" + str(scsiDiskIdx) + ".present": "TRUE"}) + vmxDict.update({"scsi0:" + str(scsiDiskIdx) + ".filename": disk_filename}) + curDisksCount = curDisksCount + 1 + + self.put_vmx(vmxDict, vmxPath) + self.esxiCnx.exec_command("vim-cmd vmsvc/reload " + str(self.moid)) # def update_vm_pyvmomi(self, annotation=None): # if annotation: @@ -845,7 +856,7 @@ def main(): "name": {"type": "str"}, "moid": {"type": "str"}, "template": {"type": "str"}, - "state": {"type": "str", "default": 'present', "choices": ['absent', 'present', 'rebootguest', 'poweredon', 'poweredoff', 'shutdownguest']}, + "state": {"type": "str", "default": 'present', "choices": ['absent', 'present', 'unchanged', 'rebootguest', 'poweredon', 'poweredoff', 'shutdownguest']}, "force": {"type": "bool", "default": False}, "datastore": {"type": "str"}, "annotation": {"type": "str", "default": ""}, @@ -890,23 +901,24 @@ class cDummyAnsibleModule(): ## Clone VM params = { - "annotation": "{'lifecycle_state': 'current', 'Name': 'test-prod-sys-a0-1589979249', 'cluster_suffix': '1589979249', 'hosttype': 'sys', 'cluster_name': 'test-prod', 'env': 'prod', 'owner': 'dougal'}", + "annotation": None, + # "annotation": "{'lifecycle_state': 'current', 'Name': 'test-prod-sys-a0-1589979249', 'cluster_suffix': '1589979249', 'hosttype': 'sys', 'cluster_name': 'test-prod', 'env': 'prod', 'owner': 'dougal'}", "cdrom": {"type": "client"}, "cloudinit_userdata": [], "customvalues": [], "datastore": "4tb-evo860-ssd", # "disks": [{"size_gb": 1, "type": "thin", "volname": "test"}], - "disks": [{"size_gb": 1, "type": "thin", "volname": "test_new"}, {"size_gb": 1, "type": "thin", "volname": "test_clone", "src": {"backing_filename": "[4tb-evo860-ssd] parsnip-dev-sys-a0-blue/parsnip-dev-sys-a0-blue--webdata.vmdk", "copy_or_move": "copy"}}], + "disks": [{"size_gb": 1, "type": "thin", "volname": "test", "src": {"backing_filename": "[4tb-evo860-ssd] testdisks-dev-sys-a0-1601204786/testdisks-dev-sys-a0-1601204786--test.vmdk", "copy_or_move": "move"}}], "force": False, "guest_id": "ubuntu-64", "hardware": {"memory_mb": "2048", "num_cpus": "2", "version": "15"}, "hostname": "192.168.1.3", "moid": None, - "name": "gold-alpine-test1", + "name": "testdisks-dev-sys-a0-1601205102", "networks": [{"cloudinit_netplan": {"ethernets": {"eth0": {"dhcp4": True}}}, "networkName": "VM Network", "virtualDev": "vmxnet3"}], "password": sys.argv[2], "state": "present", - "template": "gold-alpine", + "template": "gold-ubuntu2004-20200912150257", "username": "svc", "wait": True, "wait_timeout": 180 @@ -941,7 +953,16 @@ def fail_json(self, msg): module.fail_json(msg="If VM doesn't already exist, you must provide a name for it") # Check if the VM exists before continuing - if module.params['state'] == 'shutdownguest': + if module.params['state'] == 'unchanged': + if iScraper.moid is not None: + updateVmResult = iScraper.update_vm(annotation=module.params['annotation'], disks=module.params['disks']) + if updateVmResult != None: + module.fail_json(msg=updateVmResult) + module.exit_json(changed=True, meta={"msg": "Shutdown " + iScraper.name + ": " + str(iScraper.moid)}) + else: + module.fail_json(msg="VM doesn't exist.") + + elif module.params['state'] == 'shutdownguest': if iScraper.moid: iScraper.soap_client.send_req('<_this type="VirtualMachine">' + str(iScraper.moid) + '') time_s = 60 @@ -958,19 +979,27 @@ def fail_json(self, msg): elif module.params['state'] == 'poweredon': if iScraper.moid: - response, cookies = iScraper.soap_client.send_req('<_this type="VirtualMachine">' + str(iScraper.moid) + '') - if iScraper.soap_client.wait_for_task(xml.dom.minidom.parseString(response.read()).getElementsByTagName('returnval')[0].firstChild.data, int(module.params['wait_timeout'])) != 'success': - module.fail_json(msg="Failed to PowerOnVM_Task") - module.exit_json(changed=True, meta={"msg": "Powered-on " + iScraper.name + ": " + str(iScraper.moid)}) + (stdin, stdout, stderr) = iScraper.esxiCnx.exec_command("vim-cmd vmsvc/power.getstate " + str(iScraper.moid)) + if re.search('Powered off', stdout.read().decode('UTF-8')) is not None: + response, cookies = iScraper.soap_client.send_req('<_this type="VirtualMachine">' + str(iScraper.moid) + '') + if iScraper.soap_client.wait_for_task(xml.dom.minidom.parseString(response.read()).getElementsByTagName('returnval')[0].firstChild.data, int(module.params['wait_timeout'])) != 'success': + module.fail_json(msg="Failed to PowerOnVM_Task") + module.exit_json(changed=True, meta={"msg": "Powered-on " + iScraper.name + ": " + str(iScraper.moid)}) + else: + module.exit_json(changed=False, meta={"msg": "VM " + iScraper.name + ": already on."}) else: module.fail_json(msg="VM doesn't exist.") elif module.params['state'] == 'poweredoff': if iScraper.moid: - response, cookies = iScraper.soap_client.send_req('<_this type="VirtualMachine">' + str(iScraper.moid) + '') - if iScraper.soap_client.wait_for_task(xml.dom.minidom.parseString(response.read()).getElementsByTagName('returnval')[0].firstChild.data, int(module.params['wait_timeout'])) != 'success': - module.fail_json(msg="Failed to PowerOffVM_Task") - module.exit_json(changed=True, meta={"msg": "Powered-off " + iScraper.name + ": " + str(iScraper.moid)}) + (stdin, stdout, stderr) = iScraper.esxiCnx.exec_command("vim-cmd vmsvc/power.getstate " + str(iScraper.moid)) + if re.search('Powered on', stdout.read().decode('UTF-8')) is not None: + response, cookies = iScraper.soap_client.send_req('<_this type="VirtualMachine">' + str(iScraper.moid) + '') + if iScraper.soap_client.wait_for_task(xml.dom.minidom.parseString(response.read()).getElementsByTagName('returnval')[0].firstChild.data, int(module.params['wait_timeout'])) != 'success': + module.fail_json(msg="Failed to PowerOffVM_Task") + module.exit_json(changed=True, meta={"msg": "Powered-off " + iScraper.name + ": " + str(iScraper.moid)}) + else: + module.exit_json(changed=False, meta={"msg": "VM " + iScraper.name + ": already off."}) else: module.fail_json(msg="VM doesn't exist.") @@ -991,11 +1020,11 @@ def fail_json(self, msg): if iScraper.moid: (stdin, stdout, stderr) = iScraper.esxiCnx.exec_command("vim-cmd vmsvc/power.getstate " + str(iScraper.moid)) if re.search('Powered off', stdout.read().decode('UTF-8')) is not None: - response, cookies = iScraper.send_req('<_this type="VirtualMachine">' + str(iScraper.moid) + '') + response, cookies = iScraper.soap_client.send_req('<_this type="VirtualMachine">' + str(iScraper.moid) + '') if iScraper.soap_client.wait_for_task(xml.dom.minidom.parseString(response.read()).getElementsByTagName('returnval')[0].firstChild.data, int(module.params['wait_timeout'])) != 'success': module.fail_json(msg="Failed to PowerOnVM_Task") else: - response, cookies = iScraper.send_req('<_this type="VirtualMachine">' + str(iScraper.moid) + '') + response, cookies = iScraper.soap_client.send_req('<_this type="VirtualMachine">' + str(iScraper.moid) + '') if iScraper.soap_client.wait_for_task(xml.dom.minidom.parseString(response.read()).getElementsByTagName('returnval')[0].firstChild.data, int(module.params['wait_timeout'])) != 'success': module.fail_json(msg="Failed to RebootGuest") module.exit_json(changed=True, meta={"msg": "Rebooted " + iScraper.name + ": " + str(iScraper.moid)}) @@ -1021,47 +1050,47 @@ def fail_json(self, msg): if createVmResult != None: module.fail_json(msg="Failed to create_vm: %s" % createVmResult) + else: + updateVmResult = iScraper.update_vm(annotation=module.params['annotation'], disks=module.params['disks']) + if updateVmResult != None: + module.fail_json(msg=updateVmResult) + + (stdin, stdout, stderr) = iScraper.esxiCnx.exec_command("vim-cmd vmsvc/power.getstate " + str(iScraper.moid)) + if re.search('Powered off', stdout.read().decode('UTF-8')) is not None: response, cookies = iScraper.soap_client.send_req('<_this type="VirtualMachine">' + str(iScraper.moid) + '') - waitresp = iScraper.soap_client.wait_for_task(xml.dom.minidom.parseString(response.read()).getElementsByTagName('returnval')[0].firstChild.data, int(module.params['wait_timeout'])) - if waitresp != 'success': - module.fail_json(msg="Failed to PowerOnVM_Task: %s" % waitresp) + if iScraper.soap_client.wait_for_task(xml.dom.minidom.parseString(response.read()).getElementsByTagName('returnval')[0].firstChild.data, int(module.params['wait_timeout'])) != 'success': + module.fail_json(msg="Failed to PowerOnVM_Task") - isChanged = True + isChanged = True - ## Delete the cloud-init config - iScraper.delete_cloudinit() + ## Delete the cloud-init config + iScraper.delete_cloudinit() - if "wait" in module.params and module.params['wait']: - time_s = int(module.params['wait_timeout']) - while time_s > 0: - (stdin, stdout, stderr) = iScraper.esxiCnx.exec_command("vim-cmd vmsvc/get.guest " + str(iScraper.moid)) - guest_info = stdout.read().decode('UTF-8') - vm_params = re.search('\s*hostName\s*=\s*\"?(?P.*?)\"?,.*\n\s*ipAddress\s*=\s*\"?(?P.*?)\"?,.*', guest_info) - if vm_params and vm_params.group('vm_ip') != "" and vm_params.group('vm_hostname') != "": - break - else: - time.sleep(1) - time_s = time_s - 1 - - module.exit_json(changed=isChanged, - guest_info=guest_info, - hostname=vm_params.group('vm_hostname'), - ip_address=vm_params.group('vm_ip'), - name=module.params['name'], - moid=iScraper.moid) - else: - module.exit_json(changed=isChanged, - hostname="", - ip_address="", - name=module.params['name'], - moid=iScraper.moid) + ## Wait for IP address and hostname to be advertised by the VM (via open-vm-tools) + if "wait" in module.params and module.params['wait']: + time_s = int(module.params['wait_timeout']) + while time_s > 0: + (stdin, stdout, stderr) = iScraper.esxiCnx.exec_command("vim-cmd vmsvc/get.guest " + str(iScraper.moid)) + guest_info = stdout.read().decode('UTF-8') + vm_params = re.search('\s*hostName\s*=\s*\"?(?P.*?)\"?,.*\n\s*ipAddress\s*=\s*\"?(?P.*?)\"?,.*', guest_info) + if vm_params and vm_params.group('vm_ip') != "" and vm_params.group('vm_hostname') != "": + break + else: + time.sleep(1) + time_s = time_s - 1 + module.exit_json(changed=isChanged, + guest_info=guest_info, + hostname=vm_params.group('vm_hostname'), + ip_address=vm_params.group('vm_ip'), + name=module.params['name'], + moid=iScraper.moid) else: - updateVmResult = iScraper.update_vm(annotation=module.params['annotation']) - if updateVmResult != None: - module.fail_json(msg=updateVmResult) - - module.exit_json(changed=True, name=module.params['name'], moid=iScraper.moid) + module.exit_json(changed=isChanged, + hostname="", + ip_address="", + name=module.params['name'], + moid=iScraper.moid) else: module.exit_json(changed=False, meta={"msg": "No state."}) diff --git a/create/tasks/aws.yml b/create/tasks/aws.yml index af0ec685..65192a18 100644 --- a/create/tasks/aws.yml +++ b/create/tasks/aws.yml @@ -104,23 +104,6 @@ delay: 3 retries: 300 with_items: "{{r__ec2_vol.results}}" - vars: - cluster_hosts_target_denormalised_by_volume: | - {% set res = [] -%} - {%- for cht_host in cluster_hosts_target -%} - {%- for autovol in cht_host.auto_volumes -%} - {%- set elem = {} -%} - {%- for cht_host_key in cht_host.keys() -%} - {%- if cht_host_key != 'auto_volumes' -%} - {%- set _ = elem.update({cht_host_key: cht_host[cht_host_key]}) -%} - {%- else -%} - {%- set _ = elem.update({'auto_volume': autovol}) -%} - {%- endif -%} - {%- endfor -%} - {%- set _ = res.append(elem) -%} - {%- endfor -%} - {%- endfor -%} - {{res}} - name: create/aws | Tag the EBS volumes diff --git a/create/tasks/esxifree.yml b/create/tasks/esxifree.yml index 4b6f2f4e..ecf54c89 100644 --- a/create/tasks/esxifree.yml +++ b/create/tasks/esxifree.yml @@ -1,6 +1,6 @@ --- -- name: Create vmware instances from template +- name: create/esxifree | Create vmware instances from template esxifree_guest: hostname: "{{ cluster_vars.esxi_ip }}" username: "{{ cluster_vars.username }}" @@ -28,7 +28,7 @@ async: 7200 poll: 0 -- name: Wait for instance creation to complete +- name: create/esxifree | Wait for instance creation to complete async_status: jid: "{{ item.ansible_job_id }}" register: esxi_jobs diff --git a/create/tasks/main.yml b/create/tasks/main.yml index d6e95706..5af875dc 100644 --- a/create/tasks/main.yml +++ b/create/tasks/main.yml @@ -23,3 +23,21 @@ - name: "Create {{cluster_vars.type}} cluster" include_tasks: "{{cluster_vars.type}}.yml" + vars: + # auto_volumes are normally a list of volumes per host. We cannot iterate this within a non-nested ansible loop(with_items), so we denormalise/ flatten it into a new one-dimensional list, which has each volume, as well as all the parent host information. + cluster_hosts_target_denormalised_by_volume: | + {% set res = [] -%} + {%- for cht_host in cluster_hosts_target -%} + {%- for autovol in cht_host.auto_volumes -%} + {%- set elem = {} -%} + {%- for cht_host_key in cht_host.keys() -%} + {%- if cht_host_key != 'auto_volumes' -%} + {%- set _ = elem.update({cht_host_key: cht_host[cht_host_key]}) -%} + {%- else -%} + {%- set _ = elem.update({'auto_volume': autovol}) -%} + {%- endif -%} + {%- endfor -%} + {%- set _ = res.append(elem) -%} + {%- endfor -%} + {%- endfor -%} + {{res}} diff --git a/redeploy/__common/tasks/poweroff_vms.yml b/redeploy/__common/tasks/poweroff_vms.yml index 38e06772..3b8fccf0 100644 --- a/redeploy/__common/tasks/poweroff_vms.yml +++ b/redeploy/__common/tasks/poweroff_vms.yml @@ -64,7 +64,7 @@ username: "{{ cluster_vars.username }}" password: "{{ cluster_vars.password }}" name: "{{item.name}}" - state: present + state: unchanged annotation: "{{ item.tagslabels | combine({'maintenance_mode': 'true'}) }}" with_items: "{{ hosts_to_stop }}" @@ -76,6 +76,5 @@ name: "{{item.name}}" state: shutdownguest with_items: "{{ hosts_to_stop }}" - when: cluster_vars.type == "esxifree" when: hosts_to_stop | length \ No newline at end of file diff --git a/redeploy/__common/tasks/poweron_vms.yml b/redeploy/__common/tasks/poweron_vms.yml index 363a9aee..da950fff 100644 --- a/redeploy/__common/tasks/poweron_vms.yml +++ b/redeploy/__common/tasks/poweron_vms.yml @@ -18,7 +18,6 @@ run_once: true when: cluster_vars.type == "aws" - - name: poweron_vms | Power-on GCP GCE VM(s) asynchronously block: - name: poweron_vms | Power-on GCP GCE VM(s) @@ -45,7 +44,6 @@ with_items: "{{r__gcp_compute_instance.results}}" when: cluster_vars.type == "gcp" - - name: poweron_vms | Power-on esxifree VM(s) block: - name: poweron_vms | Power-on esxifree VM(s) @@ -54,6 +52,7 @@ username: "{{ cluster_vars.username }}" password: "{{ cluster_vars.password }}" name: "{{item.name}}" - state: powered-on + state: poweredon + with_items: "{{ hosts_to_start }}" when: cluster_vars.type == "esxifree" when: hosts_to_start | length \ No newline at end of file diff --git a/redeploy/__common/tasks/set_lifecycle_state_label.yml b/redeploy/__common/tasks/set_lifecycle_state_label.yml index 093a50be..33b97b34 100644 --- a/redeploy/__common/tasks/set_lifecycle_state_label.yml +++ b/redeploy/__common/tasks/set_lifecycle_state_label.yml @@ -36,7 +36,7 @@ username: "{{ cluster_vars.username }}" password: "{{ cluster_vars.password }}" name: "{{item.name}}" - state: present + state: "unchanged" annotation: "{{ item.tagslabels | combine({'lifecycle_state': new_state}) }}" with_items: "{{ hosts_to_relabel }}" when: cluster_vars.type == "esxifree" From 44c37adbb26cac856bfafc563dacb76f94f83faa Mon Sep 17 00:00:00 2001 From: Dougal Seeley Date: Thu, 1 Oct 2020 18:42:08 +0100 Subject: [PATCH 30/58] Extra protection for redeploy/_scheme_rmvm_keepdisk_rollback when disks have changed. --- _dependencies/tasks/main.yml | 22 +++++----- create/tasks/main.yml | 2 +- .../tasks/by_hosttype_by_host.yml | 22 +++++----- .../tasks/main.yml | 31 +------------- .../tasks/preflight.yml | 40 +++++++++++++++++++ 5 files changed, 67 insertions(+), 50 deletions(-) create mode 100644 redeploy/_scheme_rmvm_keepdisk_rollback/tasks/preflight.yml diff --git a/_dependencies/tasks/main.yml b/_dependencies/tasks/main.yml index 8cf92f20..ad6b4d29 100644 --- a/_dependencies/tasks/main.yml +++ b/_dependencies/tasks/main.yml @@ -18,23 +18,23 @@ - name: Preflight check block: - - assert: { that: "ansible_version.full is version_compare('2.9', '>=')", msg: "Ansible >=2.9 required." } - - assert: { that: "app_name is defined and app_name != ''", msg: "Please define app_name" } - - assert: { that: "app_class is defined and app_class != ''", msg: "Please define app_class" } - - assert: { that: "clusterid is defined and cluster_vars is defined", msg: "Please define clusterid" } - - assert: { that: "buildenv is defined and cluster_vars[buildenv] is defined", msg: "Please define buildenv" } + - assert: { that: "ansible_version.full is version_compare('2.9', '>=')", fail_msg: "Ansible >=2.9 required." } + - assert: { that: "app_name is defined and app_name != ''", fail_msg: "Please define app_name" } + - assert: { that: "app_class is defined and app_class != ''", fail_msg: "Please define app_class" } + - assert: { that: "clusterid is defined and cluster_vars is defined", fail_msg: "Please define clusterid" } + - assert: { that: "buildenv is defined and cluster_vars[buildenv] is defined", fail_msg: "Please define buildenv" } ## Tags/ labels must be compatible with GCP and AWS - check everything that goes into a label. - - assert: { that: "release_version is regex('^[a-z\\d\\-_]{0,63}$')", msg: "Please ensure release_version ({{release_version}}) is in the set [a-z\\d\\-_], and <63 characters long." } + - assert: { that: "release_version is regex('^[a-z\\d\\-_]{0,63}$')", fail_msg: "Please ensure release_version ({{release_version}}) is in the set [a-z\\d\\-_], and <63 characters long." } when: release_version is defined - - assert: { that: "cluster_suffix is regex('^[a-z\\d\\-_]{0,63}$')", msg: "Please ensure cluster_suffix ({{cluster_suffix}}) is in the set[a-z\\d\\-_], and <63 characters long." } + - assert: { that: "cluster_suffix is regex('^[a-z\\d\\-_]{0,63}$')", fail_msg: "Please ensure cluster_suffix ({{cluster_suffix}}) is in the set[a-z\\d\\-_], and <63 characters long." } when: cluster_suffix is defined - assert: { that: "'{%- for label in cluster_vars.custom_tagslabels -%}{% if not cluster_vars.custom_tagslabels[label] is regex('^[a-z\\d\\-_]{0,63}$') %}{{label}}: {{cluster_vars.custom_tagslabels[label]}}{% endif %}{%- endfor -%}' == ''", fail_msg: "Please ensure all cluster_vars.custom_tagslabels are in the set [a-z\\d\\-_], and <63 characters long." } when: "'custom_tagslabels' in cluster_vars" - assert: { that: "'{%- for hosttype in cluster_vars[buildenv].hosttype_vars -%}{% if ('version' in cluster_vars[buildenv].hosttype_vars[hosttype]) and (not cluster_vars[buildenv].hosttype_vars[hosttype].version is regex('^[a-z\\d\\-_]{0,63}$')) %}{{cluster_vars[buildenv].hosttype_vars[hosttype].version}}{% endif %}{%- endfor -%}' == ''", fail_msg: "Please ensure cluster_vars[{{buildenv}}].hosttype_vars[hosttype].version is in the set [a-z\\d\\-_], and <63 characters long." } - - assert: { that: "cluster_vars[buildenv] | json_query(\"hosttype_vars.*.auto_volumes[] | [?contains(`/dev/sdb,/dev/sdc,/dev/sdd,/dev/sde`, device_name)]\") | length == 0", msg: "/dev/sd[b-e] are not allowed as device_name in AWS cluster_vars[buildenv].hosttype_vars. Please start at /dev/sdf." } - when: cluster_vars.type == "aws" - - - assert: { that: "(cluster_vars.assign_public_ip == 'yes' and cluster_vars.inventory_ip == 'public') or (cluster_vars.inventory_ip == 'private')", msg: "If inventory_ip=='public', 'assign_public_ip' must be 'yes'" } + - assert: { that: "(cluster_vars.assign_public_ip == 'yes' and cluster_vars.inventory_ip == 'public') or (cluster_vars.inventory_ip == 'private')", fail_msg: "If inventory_ip=='public', 'assign_public_ip' must be 'yes'" } when: cluster_vars.type == "gcp" or cluster_vars.type == "aws" + + - assert: { that: "cluster_vars[buildenv] | json_query(\"hosttype_vars.*.auto_volumes[] | [?contains(`/dev/sdb,/dev/sdc,/dev/sdd,/dev/sde`, device_name)]\") | length == 0", fail_msg: "/dev/sd[b-e] are not allowed as device_name in AWS cluster_vars[buildenv].hosttype_vars. Please start at /dev/sdf." } + when: cluster_vars.type == "aws" diff --git a/create/tasks/main.yml b/create/tasks/main.yml index 5af875dc..dfbb3db4 100644 --- a/create/tasks/main.yml +++ b/create/tasks/main.yml @@ -24,7 +24,7 @@ - name: "Create {{cluster_vars.type}} cluster" include_tasks: "{{cluster_vars.type}}.yml" vars: - # auto_volumes are normally a list of volumes per host. We cannot iterate this within a non-nested ansible loop(with_items), so we denormalise/ flatten it into a new one-dimensional list, which has each volume, as well as all the parent host information. + # auto_volumes are normally a list of volumes per host. We cannot iterate this within a non-nested ansible loop(with_items), so we denormalise/ flatten it into a new one-dimensional list, of each volume, as well as all the parent host information. cluster_hosts_target_denormalised_by_volume: | {% set res = [] -%} {%- for cht_host in cluster_hosts_target -%} diff --git a/redeploy/_scheme_rmvm_keepdisk_rollback/tasks/by_hosttype_by_host.yml b/redeploy/_scheme_rmvm_keepdisk_rollback/tasks/by_hosttype_by_host.yml index bcc20497..a1f88ea4 100644 --- a/redeploy/_scheme_rmvm_keepdisk_rollback/tasks/by_hosttype_by_host.yml +++ b/redeploy/_scheme_rmvm_keepdisk_rollback/tasks/by_hosttype_by_host.yml @@ -2,15 +2,19 @@ - debug: msg="by_hosttype_by_host | Attempting to redeploy {{host_to_redeploy.hostname}}" -- name: by_hosttype_by_host | run predeleterole role - include_role: - name: "{{predeleterole}}" - when: predeleterole is defined and predeleterole != "" - -- name: by_hosttype_by_host | Power off old VM - include_role: - name: clusterverse/redeploy/__common - tasks_from: poweroff_vms.yml +- name: stop/ remove previous instance + block: + - name: by_hosttype_by_host | run predeleterole role + include_role: + name: "{{predeleterole}}" + vars: + hosts_to_remove: "{{ hosts_to_stop }}" + when: predeleterole is defined and predeleterole != "" + + - name: by_hosttype_by_host | Power off old VM + include_role: + name: clusterverse/redeploy/__common + tasks_from: poweroff_vms.yml vars: _host_to_redeploy_nosuffix: "{{host_to_redeploy.hostname | regex_replace('-(?!.*-).*')}}" #Remove the cluster_suffix from the hostname hosts_to_stop: "{{ cluster_hosts_state | to_json | from_json | json_query(\"[?tagslabels.lifecycle_state!='current' && starts_with(name, '\" + _host_to_redeploy_nosuffix + \"')]\") }}" diff --git a/redeploy/_scheme_rmvm_keepdisk_rollback/tasks/main.yml b/redeploy/_scheme_rmvm_keepdisk_rollback/tasks/main.yml index 328599e7..65fb69d7 100644 --- a/redeploy/_scheme_rmvm_keepdisk_rollback/tasks/main.yml +++ b/redeploy/_scheme_rmvm_keepdisk_rollback/tasks/main.yml @@ -1,34 +1,7 @@ --- -- name: Preflight check - block: - - block: - - name: Preflight check | ec2_instance_info - ec2_instance_info: - filters: { "instance-state-name": [ "running", "stopped" ], "tag:cluster_name": "{{cluster_name}}", "tag:lifecycle_state": "current" } - aws_access_key: "{{cluster_vars[buildenv].aws_access_key}}" - aws_secret_key: "{{cluster_vars[buildenv].aws_secret_key}}" - region: "{{cluster_vars.region}}" - register: r__ec2_instance_info - - - assert: { that: "_invalid_disks | length == 0", msg: "EBS disks with a device_name of /dev/sd[b-e] cannot be reattached to a new instance (an AWS limitation) [found on: {{ _invalid_disks | join(',')}}]. To replace these, you must use a redeploy scheme that copies the disks." } - vars: { _invalid_disks: "{{ r__ec2_instance_info.instances | json_query(\"[?block_device_mappings[?contains(`/dev/sdb,/dev/sdc,/dev/sdd,/dev/sde`, device_name)]].tags.Name\") }}" } - when: cluster_vars.type == "aws" - - - assert: { that: "_scheme_rmvm_keepdisk_only__copy_or_move is defined and _scheme_rmvm_keepdisk_only__copy_or_move in ['copy', 'move']", msg: "ERROR - _scheme_rmvm_keepdisk_only__copy_or_move must be defined and set to either 'copy' or 'move'" } - when: cluster_vars.type == "esxifree" - - - assert: { that: "non_current_hosts | length == 0", msg: "ERROR - All VMs must be in the 'current' lifecycle_state. Those not [{{non_current_hosts | join(',')}}]" } - vars: - non_current_hosts: "{{ cluster_hosts_state | json_query(\"[?tagslabels.lifecycle_state!='current'].name\") }}" - when: (canary=="start" or canary=="none") and (ignore_lifecycle_check is not defined or ignore_lifecycle_check==false) - - - assert: - that: "{{chs_hosts | difference(cht_hosts) | length==0}}" - msg: "Cannot use this scheme to redeploy to smaller cluster; [{{ chs_hosts | join(',') }}] > [{{ cht_hosts | join(',') }}]" - vars: - cht_hosts: "{{ cluster_hosts_target | json_query(\"[].hostname\") | map('regex_replace', '-(?!.*-).*') | list }}" - chs_hosts: "{{ cluster_hosts_state | json_query(\"[].name\") | map('regex_replace', '-(?!.*-).*') | list }}" +- name: Include preflight checks/ assertions. + include_tasks: preflight.yml - name: Redeploy by hosttype; rollback on fail block: diff --git a/redeploy/_scheme_rmvm_keepdisk_rollback/tasks/preflight.yml b/redeploy/_scheme_rmvm_keepdisk_rollback/tasks/preflight.yml new file mode 100644 index 00000000..600f955d --- /dev/null +++ b/redeploy/_scheme_rmvm_keepdisk_rollback/tasks/preflight.yml @@ -0,0 +1,40 @@ +--- + +- name: Preflight check + block: + - block: + - name: Preflight check | get ec2_instance_info for current disk information + ec2_instance_info: + filters: { "instance-state-name": [ "running", "stopped" ], "tag:cluster_name": "{{cluster_name}}", "tag:lifecycle_state": "current" } + aws_access_key: "{{cluster_vars[buildenv].aws_access_key}}" + aws_secret_key: "{{cluster_vars[buildenv].aws_secret_key}}" + region: "{{cluster_vars.region}}" + register: r__ec2_instance_info + + - assert: { that: "_invalid_disks | length == 0", fail_msg: "EBS disks with a device_name of /dev/sd[b-e] cannot be reattached to a new instance (an AWS limitation) [found on: {{ _invalid_disks | join(',')}}]. To replace these, you must use a redeploy scheme that copies the disks." } + vars: { _invalid_disks: "{{ r__ec2_instance_info.instances | json_query(\"[?block_device_mappings[?contains(`/dev/sdb,/dev/sdc,/dev/sdd,/dev/sde`, device_name)]].tags.Name\") }}" } + + - assert: { that: "ec2_disks_is_subset_of_target_disks|bool==false", fail_msg: "Existing EBS disks must have be same as (or a subset of) the auto_volumes defined in your cluster_vars definition.", success_msg: "Success - Existing EBS disks are the same as (or a subset of) the auto_volumes defined in your cluster_vars definition" } + vars: + ec2_disks_is_subset_of_target_disks: |- + {%- set testloop = namespace(is_not_subset=false) -%} + {%- for cht_host in cluster_hosts_target | json_query('[].{hostname: hostname, discs: auto_volumes[].device_name}') -%} + {%- for ec2_host in r__ec2_instance_info.instances | json_query('[?tags.lifecycle_state != "current"].{hostname: tags.Name, discs: block_device_mappings[].device_name | [1:]}') -%} + {%- if cht_host.hostname | regex_replace('-(?!.*-).*') == ec2_host.hostname | regex_replace('-(?!.*-).*') -%} + {%- if not ec2_host.discs is subset(cht_host.discs) -%} + {%- set testloop.is_not_subset = true -%} + {%- endif -%} + {%- endif -%} + {%- endfor -%} + {%- endfor -%} + {{ testloop.is_not_subset }} + when: cluster_vars.type == "aws" + + - assert: { that: "_scheme_rmvm_keepdisk_only__copy_or_move is defined and _scheme_rmvm_keepdisk_only__copy_or_move in ['copy', 'move']", fail_msg: "ERROR - _scheme_rmvm_keepdisk_only__copy_or_move must be defined and set to either 'copy' or 'move'" } + when: cluster_vars.type == "esxifree" + + - assert: { that: "non_current_hosts | length == 0", fail_msg: "ERROR - All VMs must be in the 'current' lifecycle_state. Those not [{{non_current_hosts | join(',')}}]" } + vars: { non_current_hosts: "{{ cluster_hosts_state | json_query(\"[?tagslabels.lifecycle_state!='current'].name\") }}" } + when: (canary=="start" or canary=="none") and (ignore_lifecycle_check is not defined or ignore_lifecycle_check==false) + + - assert: { that: "(cluster_hosts_state | selectattr('tagslabels.lifecycle_state', '==', 'current') | list | length) == (cluster_hosts_target | length)", fail_msg: "Cannot use this scheme to redeploy to a different-sized cluster" } From 86b15a99fe770737c4bc8131be16e3a53c391cba Mon Sep 17 00:00:00 2001 From: Dougal Seeley Date: Fri, 2 Oct 2020 10:35:28 +0100 Subject: [PATCH 31/58] Prototype ansible_vault.py plugin --- _dependencies/action_plugins/ansible_vault.py | 49 +++++++++++++++++++ 1 file changed, 49 insertions(+) create mode 100644 _dependencies/action_plugins/ansible_vault.py diff --git a/_dependencies/action_plugins/ansible_vault.py b/_dependencies/action_plugins/ansible_vault.py new file mode 100644 index 00000000..54d61077 --- /dev/null +++ b/_dependencies/action_plugins/ansible_vault.py @@ -0,0 +1,49 @@ +from __future__ import (absolute_import, division, print_function) + +__metaclass__ = type + +from ansible.plugins.action import ActionBase +from ansible.parsing.vault import VaultLib, VaultSecret +import re + + +class ActionModule(ActionBase): + TRANSFERS_FILES = False + + def run(self, tmp=None, task_vars=None): + if task_vars is None: + task_vars = dict() + + if 'vaultid' not in self._task.args or 'vaultpass' not in self._task.args or 'action' not in self._task.args: + return {"failed": True, "msg": "'vaultid' and 'vaultpass' and 'action' are required options"} + + result = super(ActionModule, self).run(tmp, task_vars) + del tmp # tmp is deprecated + + if self._task.args["action"] == "encrypt": + if "plaintext" not in self._task.args: + return {"failed": True, "msg": "'plaintext' is required for encrypt"} + + # encrypt: + oVaultSecret = VaultSecret(self._task.args["vaultpass"].encode('utf-8')) + oVaultLib = VaultLib([(self._task.args["vaultid"], oVaultSecret)]) + vault_tag = oVaultLib.encrypt(self._task.args["plaintext"], oVaultSecret, self._task.args["vaultid"]) + + # reformat output + g_tag_value = re.match(r"^(?P
\$ANSIBLE_VAULT;(?P[\d\.]+?);(?P\w+?)(?:;(?P.*?))?)[\r\n](?P.*)$", vault_tag, flags=re.DOTALL) + res_cipherstr = re.sub(r'[ \n\r]', "", g_tag_value.group('vaulttext_raw'), flags=re.DOTALL) + res_vaulttext = g_tag_value.group('header') + "\n" + res_cipherstr + + result['msg'] = {"res_vaulttext": res_vaulttext, "plaintext": self._task.args["plaintext"]} + + else: + if "vaulttext" not in self._task.args: + return {"failed": True, "msg": "'vaulttext' is required for decrypt"} + + oVaultLib = VaultLib([(self._task.args["vaultid"], VaultSecret(self._task.args["vaultpass"].encode('utf-8')))]) + plaintext = oVaultLib.decrypt(self._task.args["vaulttext"]) + result['msg'] = {"res_vaulttext": self._task.args["vaulttext"], "plaintext": plaintext} + + result['failed'] = False + + return result From caef919a8f5860a658e5c4a47417455451e4de3b Mon Sep 17 00:00:00 2001 From: Dougal Seeley Date: Mon, 5 Oct 2020 09:43:33 +0100 Subject: [PATCH 32/58] Fixes for _scheme_rmvm_keepdisk_rollback --- EXAMPLE/group_vars/_skel/cluster_vars.yml | 15 +- .../group_vars/test_aws_euw1/cluster_vars.yml | 14 +- .../group_vars/test_gcp_euw1/cluster_vars.yml | 2 + README.md | 22 +- _dependencies/library/ebsmap.py | 270 ++++++++++++++++++ _dependencies/library/ebsmap__LICENSE | 29 ++ _dependencies/library/ebsmap__README.md | 24 ++ _dependencies/tasks/main.yml | 2 +- config/tasks/disks_auto_aws.yml | 109 +++++++ config/tasks/disks_auto_aws_nvme.yml | 112 -------- ...{disks_auto.yml => disks_auto_generic.yml} | 19 +- config/tasks/main.yml | 10 +- create/tasks/aws.yml | 12 +- .../tasks/main.yml | 2 +- .../tasks/main.yml | 2 +- ..._diskinfo_to_cluster_hosts_target__aws.yml | 6 +- .../tasks/preflight.yml | 8 +- 17 files changed, 499 insertions(+), 159 deletions(-) create mode 100644 _dependencies/library/ebsmap.py create mode 100644 _dependencies/library/ebsmap__LICENSE create mode 100644 _dependencies/library/ebsmap__README.md create mode 100644 config/tasks/disks_auto_aws.yml delete mode 100644 config/tasks/disks_auto_aws_nvme.yml rename config/tasks/{disks_auto.yml => disks_auto_generic.yml} (79%) diff --git a/EXAMPLE/group_vars/_skel/cluster_vars.yml b/EXAMPLE/group_vars/_skel/cluster_vars.yml index aab028aa..4f004bd4 100644 --- a/EXAMPLE/group_vars/_skel/cluster_vars.yml +++ b/EXAMPLE/group_vars/_skel/cluster_vars.yml @@ -72,13 +72,14 @@ cluster_name: "{{app_name}}-{{buildenv}}" # Identifies the cluster within # sandbox: # hosttype_vars: # sys: {vms_by_az: {a: 1, b: 1, c: 1}, flavor: t3a.nano, version: "{{sys_version | default('')}}", auto_volumes: []} -# # sysnobeats: {vms_by_az: {a: 1, b: 0, c: 0}, skip_beat_install:true, flavor: t3a.nano, version: "{{sysnobeats_version | default('')}}", auto_volumes: [] -# # sysdisks: {vms_by_az: {a: 1, b: 0, c: 0}, flavor: t3a.nano, version: "{{sysdisks_version | default('')}}", auto_volumes: [{"device_name": "/dev/sdf", mountpoint: "/var/log/mysvc", fstype: "ext4", "volume_type": "gp2", "volume_size": 2, ephemeral: False, encrypted: True, "delete_on_termination": true, perms: {owner: "root", group: "sudo", mode: "775"} }, {"device_name": "/dev/sdg", mountpoint: "/var/log/mysvc2", fstype: "ext4", "volume_type": "gp2", "volume_size": 3, ephemeral: False, encrypted: True, "delete_on_termination": true}, {"device_name": "/dev/sdh", mountpoint: "/var/log/mysvc3", fstype: "ext4", "volume_type": "gp2", "volume_size": 2, ephemeral: False, encrypted: True, "delete_on_termination": true}]} -# # sysdisks_snapshot: {vms_by_az: {a: 1, b: 1, c: 0}, flavor: t3a.nano, auto_volumes: [{"snapshot_tags": {"tag:backup_id": "57180566894481854905"}, "device_name": "/dev/sdf", mountpoint: "/data", fstype: "ext4", "volume_type": "gp2", "volume_size": 2, ephemeral: False, encrypted: True, "delete_on_termination": true }]} -# # hostnvme_multi: {vms_by_az: {a: 1, b: 0, c: 0}, flavor: i3en.2xlarge, auto_volumes: [], nvme: {volumes: [{mountpoint: "/var/log/mysvc", fstype: ext4, volume_size: 2500}, {mountpoint: "/var/log/mysvc2", fstype: ext4, volume_size: 2500}]} } -# # hostnvme_lvm: {vms_by_az: {a: 1, b: 0, c: 0}, flavor: i3en.2xlarge, auto_volumes: [], nvme: {volumes: [{mountpoint: "/var/log/mysvc", fstype: ext4, volume_size: 2500}, {mountpoint: "/var/log/mysvc", fstype: ext4, volume_size: 2500}], lvmparams: {vg_name: "vg0", lv_name: "lv0", lv_size: "+100%FREE"} } } -# # hostssd: {vms_by_az: {a: 1, b: 0, c: 0}, flavor: c3.large, auto_volumes: [{device_name: "/dev/sdf", mountpoint: "/var/log/mysvc", fstype: "ext4", "volume_type": "gp2", "volume_size": 2, ephemeral: False, encrypted: True, "delete_on_termination": true}]} -# # hosthdd: {vms_by_az: {a: 1, b: 0, c: 0}, flavor: h1.2xlarge, auto_volumes: [{device_name: "/dev/sdf", mountpoint: "/var/log/mysvc", fstype: "ext4", "volume_type": "gp2", "volume_size": 2, ephemeral: False, encrypted: True, "delete_on_termination": true}]} +# #sysdisks2: {vms_by_az: {a: 1, b: 0, c: 0}, flavor: t3a.nano, version: "{{sysdisks_version | default('')}}", auto_volumes: [{"device_name": "/dev/sdf", mountpoint: "/media/mysvc", fstype: "ext4", "volume_type": "gp2", "volume_size": 1, encrypted: True, "delete_on_termination": true, perms: {owner: "root", group: "sudo", mode: "775"} }, {"device_name": "/dev/sdg", mountpoint: "/media/mysvc2", fstype: "ext4", "volume_type": "gp2", "volume_size": 1, encrypted: True, "delete_on_termination": true}]} +# #sysdisks3: {vms_by_az: {a: 1, b: 0, c: 0}, flavor: t3a.nano, version: "{{sysdisks_version | default('')}}", auto_volumes: [{"device_name": "/dev/sdf", mountpoint: "/media/mysvc", fstype: "ext4", "volume_type": "gp2", "volume_size": 1, encrypted: True, "delete_on_termination": true, perms: {owner: "root", group: "sudo", mode: "775"} }, {"device_name": "/dev/sdg", mountpoint: "/media/mysvc2", fstype: "ext4", "volume_type": "gp2", "volume_size": 1, encrypted: True, "delete_on_termination": true}, {"device_name": "/dev/sdh", mountpoint: "/media/mysvc3", fstype: "ext4", "volume_type": "gp2", "volume_size": 1, encrypted: True, "delete_on_termination": true}]} +# #sysdisks_snapshot: {vms_by_az: {a: 1, b: 1, c: 0}, flavor: t3a.nano, version: "{{sys_version | default('')}}", auto_volumes: [{"snapshot_tags": {"tag:backup_id": "57180566894481854905"}, "device_name": "/dev/sdf", mountpoint: "/data", fstype: "ext4", "volume_type": "gp2", "volume_size": 1, encrypted: True, "delete_on_termination": true }]} +# #hostnvme_multi: {vms_by_az: {a: 1, b: 0, c: 0}, flavor: i3en.2xlarge, version: "{{sys_version | default('')}}", auto_volumes: [{device_name: "/dev/sdb", mountpoint: "/media/data", fstype: "ext4", "volume_type": "ephemeral", ephemeral: ephemeral0}, {device_name: "/dev/sdbc", mountpoint: "/media/data", fstype: "ext4", "volume_type": "ephemeral", ephemeral: ephemeral0}] } } +# #hostnvme_lvm: {vms_by_az: {a: 1, b: 0, c: 0}, flavor: i3en.2xlarge, version: "{{sys_version | default('')}}", auto_volumes: [{device_name: "/dev/sdb", mountpoint: "/media/data", fstype: "ext4", "volume_type": "ephemeral", ephemeral: ephemeral0}, {device_name: "/dev/sdbc", mountpoint: "/media/data", fstype: "ext4", "volume_type": "ephemeral", ephemeral: ephemeral0}], lvmparams: {vg_name: "vg0", lv_name: "lv0", lv_size: "+100%FREE"} } +# #hosthdd_multi: {vms_by_az: {a: 1, b: 0, c: 0}, flavor: d2.xlarge, version: "{{sys_version | default('')}}", auto_volumes: [{device_name: "/dev/sdb", mountpoint: "/media/mysvc", fstype: "ext4", "volume_type": "ephemeral", ephemeral: ephemeral0}, {device_name: "/dev/sdc", mountpoint: "/media/mysvc2", fstype: "ext4", "volume_type": "ephemeral", ephemeral: ephemeral1}, {device_name: "/dev/sdd", mountpoint: "/media/mysvc3", fstype: "ext4", "volume_type": "ephemeral", ephemeral: ephemeral2}] } +# #hosthdd_lvm: {vms_by_az: {a: 1, b: 0, c: 0}, flavor: d2.xlarge, version: "{{sys_version | default('')}}", auto_volumes: [{device_name: "/dev/sdb", mountpoint: "/media/data", fstype: "ext4", "volume_type": "ephemeral", ephemeral: ephemeral0}, {device_name: "/dev/sdc", mountpoint: "/media/data", fstype: "ext4", "volume_type": "ephemeral", ephemeral: ephemeral1}, {device_name: "/dev/sdd", mountpoint: "/media/data", fstype: "ext4", "volume_type": "ephemeral", ephemeral: ephemeral2}], lvmparams: {vg_name: "vg0", lv_name: "lv0", lv_size: "+100%FREE"} } +# #hostssd: {vms_by_az: {a: 1, b: 0, c: 0}, flavor: c3.large, version: "{{sys_version | default('')}}", auto_volumes: [{device_name: "/dev/sdf", mountpoint: "/media/mysvc", fstype: "ext4", "volume_type": "gp2", "volume_size": 1, ephemeral: 0, encrypted: True, "delete_on_termination": true}]} # aws_access_key: "" # aws_secret_key: "" # vpc_name: "test{{buildenv}}" diff --git a/EXAMPLE/group_vars/test_aws_euw1/cluster_vars.yml b/EXAMPLE/group_vars/test_aws_euw1/cluster_vars.yml index d3141a27..810f4be2 100644 --- a/EXAMPLE/group_vars/test_aws_euw1/cluster_vars.yml +++ b/EXAMPLE/group_vars/test_aws_euw1/cluster_vars.yml @@ -79,12 +79,14 @@ cluster_vars: sandbox: hosttype_vars: sys: {vms_by_az: {a: 1, b: 1, c: 1}, flavor: t3a.nano, version: "{{sys_version | default('')}}", auto_volumes: []} -# sysdisks: {vms_by_az: {a: 1, b: 0, c: 0}, flavor: t3a.nano, version: "{{sysdisks_version | default('')}}", auto_volumes: [{"device_name": "/dev/sdf", mountpoint: "/var/log/mysvc", fstype: "ext4", "volume_type": "gp2", "volume_size": 2, ephemeral: False, encrypted: True, "delete_on_termination": true, perms: {owner: "root", group: "sudo", mode: "775"} }, {"device_name": "/dev/sdg", mountpoint: "/var/log/mysvc2", fstype: "ext4", "volume_type": "gp2", "volume_size": 3, ephemeral: False, encrypted: True, "delete_on_termination": true}, {"device_name": "/dev/sdh", mountpoint: "/var/log/mysvc3", fstype: "ext4", "volume_type": "gp2", "volume_size": 2, ephemeral: False, encrypted: True, "delete_on_termination": true}]} -# sysdisks_snapshot: {vms_by_az: {a: 1, b: 1, c: 0}, flavor: t3a.nano, auto_volumes: [{"snapshot_tags": {"tag:backup_id": "57180566894481854905"}, "device_name": "/dev/sdf", mountpoint: "/data", fstype: "ext4", "volume_type": "gp2", "volume_size": 2, ephemeral: False, encrypted: True, "delete_on_termination": true }]} -# hostnvme_multi: {vms_by_az: {a: 1, b: 0, c: 0}, flavor: i3en.2xlarge, auto_volumes: [], nvme: {volumes: [{mountpoint: "/var/log/mysvc", fstype: ext4, volume_size: 2500}, {mountpoint: "/var/log/mysvc2", fstype: ext4, volume_size: 2500}]} } -# hostnvme_lvm: {vms_by_az: {a: 1, b: 0, c: 0}, flavor: i3en.2xlarge, auto_volumes: [], nvme: {volumes: [{mountpoint: "/var/log/mysvc", fstype: ext4, volume_size: 2500}, {mountpoint: "/var/log/mysvc", fstype: ext4, volume_size: 2500}], lvmparams: {vg_name: "vg0", lv_name: "lv0", lv_size: "+100%FREE"} } } -# hostssd: {vms_by_az: {a: 1, b: 0, c: 0}, flavor: c3.large, auto_volumes: [{device_name: "/dev/sdf", mountpoint: "/var/log/mysvc", fstype: "ext4", "volume_type": "gp2", "volume_size": 2, ephemeral: False, encrypted: True, "delete_on_termination": true}]} -# hosthdd: {vms_by_az: {a: 1, b: 0, c: 0}, flavor: h1.2xlarge, auto_volumes: [{device_name: "/dev/sdf", mountpoint: "/var/log/mysvc", fstype: "ext4", "volume_type": "gp2", "volume_size": 2, ephemeral: False, encrypted: True, "delete_on_termination": true}]} +# sysdisks2: {vms_by_az: {a: 1, b: 0, c: 0}, flavor: t3a.nano, version: "{{sysdisks_version | default('')}}", auto_volumes: [{"device_name": "/dev/sdf", mountpoint: "/media/mysvc", fstype: "ext4", "volume_type": "gp2", "volume_size": 1, encrypted: True, "delete_on_termination": true, perms: {owner: "root", group: "sudo", mode: "775"} }, {"device_name": "/dev/sdg", mountpoint: "/media/mysvc2", fstype: "ext4", "volume_type": "gp2", "volume_size": 1, encrypted: True, "delete_on_termination": true}]} +# sysdisks3: {vms_by_az: {a: 1, b: 0, c: 0}, flavor: t3a.nano, version: "{{sysdisks_version | default('')}}", auto_volumes: [{"device_name": "/dev/sdf", mountpoint: "/media/mysvc", fstype: "ext4", "volume_type": "gp2", "volume_size": 1, encrypted: True, "delete_on_termination": true, perms: {owner: "root", group: "sudo", mode: "775"} }, {"device_name": "/dev/sdg", mountpoint: "/media/mysvc2", fstype: "ext4", "volume_type": "gp2", "volume_size": 1, encrypted: True, "delete_on_termination": true}, {"device_name": "/dev/sdh", mountpoint: "/media/mysvc3", fstype: "ext4", "volume_type": "gp2", "volume_size": 1, encrypted: True, "delete_on_termination": true}]} +# sysdisks_snapshot: {vms_by_az: {a: 1, b: 1, c: 0}, flavor: t3a.nano, version: "{{sys_version | default('')}}", auto_volumes: [{"snapshot_tags": {"tag:backup_id": "57180566894481854905"}, "device_name": "/dev/sdf", mountpoint: "/data", fstype: "ext4", "volume_type": "gp2", "volume_size": 1, encrypted: True, "delete_on_termination": true }]} +# hostnvme_multi: {vms_by_az: {a: 1, b: 0, c: 0}, flavor: i3en.2xlarge, version: "{{sys_version | default('')}}", auto_volumes: [{device_name: "/dev/sdb", mountpoint: "/media/data", fstype: "ext4", "volume_type": "ephemeral", ephemeral: ephemeral0}, {device_name: "/dev/sdbc", mountpoint: "/media/data", fstype: "ext4", "volume_type": "ephemeral", ephemeral: ephemeral0}] } } +# hostnvme_lvm: {vms_by_az: {a: 1, b: 0, c: 0}, flavor: i3en.2xlarge, version: "{{sys_version | default('')}}", auto_volumes: [{device_name: "/dev/sdb", mountpoint: "/media/data", fstype: "ext4", "volume_type": "ephemeral", ephemeral: ephemeral0}, {device_name: "/dev/sdbc", mountpoint: "/media/data", fstype: "ext4", "volume_type": "ephemeral", ephemeral: ephemeral0}], lvmparams: {vg_name: "vg0", lv_name: "lv0", lv_size: "+100%FREE"} } +# hosthdd_multi: {vms_by_az: {a: 1, b: 0, c: 0}, flavor: d2.xlarge, version: "{{sys_version | default('')}}", auto_volumes: [{device_name: "/dev/sdb", mountpoint: "/media/mysvc", fstype: "ext4", "volume_type": "ephemeral", ephemeral: ephemeral0}, {device_name: "/dev/sdc", mountpoint: "/media/mysvc2", fstype: "ext4", "volume_type": "ephemeral", ephemeral: ephemeral1}, {device_name: "/dev/sdd", mountpoint: "/media/mysvc3", fstype: "ext4", "volume_type": "ephemeral", ephemeral: ephemeral2}] } +# hosthdd_lvm: {vms_by_az: {a: 1, b: 0, c: 0}, flavor: d2.xlarge, version: "{{sys_version | default('')}}", auto_volumes: [{device_name: "/dev/sdb", mountpoint: "/media/data", fstype: "ext4", "volume_type": "ephemeral", ephemeral: ephemeral0}, {device_name: "/dev/sdc", mountpoint: "/media/data", fstype: "ext4", "volume_type": "ephemeral", ephemeral: ephemeral1}, {device_name: "/dev/sdd", mountpoint: "/media/data", fstype: "ext4", "volume_type": "ephemeral", ephemeral: ephemeral2}], lvmparams: {vg_name: "vg0", lv_name: "lv0", lv_size: "+100%FREE"} } +# hostssd: {vms_by_az: {a: 1, b: 0, c: 0}, flavor: c3.large, version: "{{sys_version | default('')}}", auto_volumes: [{device_name: "/dev/sdf", mountpoint: "/media/mysvc", fstype: "ext4", "volume_type": "gp2", "volume_size": 1, ephemeral: 0, encrypted: True, "delete_on_termination": true}]} aws_access_key: "" aws_secret_key: "" vpc_name: "test{{buildenv}}" diff --git a/EXAMPLE/group_vars/test_gcp_euw1/cluster_vars.yml b/EXAMPLE/group_vars/test_gcp_euw1/cluster_vars.yml index 6ee9dcd1..e4390373 100644 --- a/EXAMPLE/group_vars/test_gcp_euw1/cluster_vars.yml +++ b/EXAMPLE/group_vars/test_gcp_euw1/cluster_vars.yml @@ -4,6 +4,8 @@ gcp_credentials_file: "{{ lookup('env','GCP_CREDENTIALS') | default('/dev/null', true) }}" gcp_credentials_json: "{{ lookup('file', gcp_credentials_file) | default({'project_id': 'GCP_CREDENTIALS__NOT_SET','client_email': 'GCP_CREDENTIALS__NOT_SET'}, true) }}" +redeploy_schemes_supported: ['_scheme_addallnew_rmdisk_rollback', '_scheme_addnewvm_rmdisk_rollback', '_scheme_rmvm_rmdisk_only'] + #redeploy_scheme: _scheme_addallnew_rmdisk_rollback #redeploy_scheme: _scheme_addnewvm_rmdisk_rollback #redeploy_scheme: _scheme_rmvm_rmdisk_only diff --git a/README.md b/README.md index 33b6c03e..d963d3d9 100644 --- a/README.md +++ b/README.md @@ -5,7 +5,7 @@ A full-lifecycle, immutable cloud infrastructure cluster management **role**, us + **Scale (e.g. add a node):** If you change the config yaml and rerun the deploy, new nodes will be added. + **Redeploy (e.g. up-version):** If you need to up-version, the `redeploy.yml` playbook will replace each node in turn, (with optional callbacks), and rollback if any failures occur. -**clusterverse** is designed to deploy base-vm infrastructure that underpins cluster-based infrastructure, for example, Couchbase, or Cassandra. +**clusterverse** is designed to manage base-vm infrastructure that underpins cluster-based infrastructure, for example, Couchbase, Kafka, Elasticsearch, or Cassandra. ## Contributing Contributions are welcome and encouraged. Please see [CONTRIBUTING.md](https://github.com/sky-uk/clusterverse/blob/master/CONTRIBUTING.md) for details. @@ -35,7 +35,8 @@ To active the pipenv: ### DNS DNS is optional. If unset, no DNS names will be created. If required, you will need a DNS zone delegated to one of the following: + Bind9 -+ Route53 ++ AWS Route53 ++ Google Cloud DNS Credentials to the DNS server will also be required. These are specified in the `cluster_vars.yml` file described below. @@ -70,13 +71,15 @@ Credentials can be encrypted inline in the playbooks using [ansible-vault](https ## Usage **clusterverse** is an Ansible _role_, and as such must be imported into your \/roles directory. There is a full-featured example in the [/EXAMPLE](https://github.com/sky-uk/clusterverse/tree/master/EXAMPLE) subdirectory. -To import the role into your project, create a `requirements.yml` file containing: +To import the role into your project, create a [`requirements.yml`](https://github.com/sky-uk/clusterverse/blob/master/EXAMPLE/requirements.yml) file containing: ``` - src: https://github.com/sky-uk/clusterverse - version: master ## or hash, or version + version: master ## branch, hash, or tag name: clusterverse ``` -To install the role into a project's `roles` directory: +If you use a `cluster.yml` file similar to the example found in [EXAMPLE/cluster.yml](https://github.com/sky-uk/clusterverse/blob/master/EXAMPLE/cluster.yml), clusterverse will be installed automatically on each run of the playbook. + +To install it manually: + `ansible-galaxy install -r requirements.yml -p //roles/` @@ -110,7 +113,7 @@ The role is designed to run in two modes: + **It assumes a resilient deployment (it can tolerate one node being deleted from the cluster). There is no rollback in case of failure** + For each node in the cluster: + Run `predeleterole` - + Delete the node + + Delete/ terminate the node (note, this is _irreversible_). + Run the main cluster.yml (with the same parameters as for the main playbook), which forces the missing node to be redeployed (the `cluster_suffix` remains the same). + If the process fails at any point: + No further VMs will be deleted or rebuilt - the playbook stops. @@ -130,8 +133,9 @@ The role is designed to run in two modes: + The old VMs are stopped. + If the process fails for any reason, the old VMs are reinstated, and the new VMs stopped (rollback) + To delete the old VMs, either set '-e canary_tidy_on_success=true', or call redeploy.yml with '-e canary=tidy' - + **_scheme_rmvm_keepdisk_rollback** - + _Cluster topology must remain identical_ + + **_scheme_rmvm_keepdisk_rollback (AWS only so far)** + + Redeploys the nodes one by one, and moves the secondary (non-root) disks from the old to the new (note, only non-ephemeral disks can be moved). + + _Cluster topology must remain identical. More disks may be added, but none may change or be removed._ + **It assumes a resilient deployment (it can tolerate one node being removed from the cluster).** + For each node in the cluster: + Run `predeleterole` @@ -139,5 +143,5 @@ The role is designed to run in two modes: + Detach the disks from the old node + Run the main cluster.yml to create a new node + Attach disks to new node - + If the process fails for any reason, the old VMs are reinstated (and the disks reattached to the old nodes), and the new VMs stopped (rollback) + + If the process fails for any reason, the old VMs are reinstated (and the disks reattached to the old nodes), and the new VMs are stopped (rollback) + To delete the old VMs, either set '-e canary_tidy_on_success=true', or call redeploy.yml with '-e canary=tidy' diff --git a/_dependencies/library/ebsmap.py b/_dependencies/library/ebsmap.py new file mode 100644 index 00000000..dbd6d717 --- /dev/null +++ b/_dependencies/library/ebsmap.py @@ -0,0 +1,270 @@ +# Copyright 2020 Dougal Seeley +# https://github.com/dseeley/ebsmap + +# Copyright 2017 Amazon.com, Inc. and its affiliates. All Rights Reserved. +# Licensed under the MIT License. See the LICENSE accompanying this file +# for the specific language governing permissions and limitations under +# the License. + +from __future__ import (absolute_import, division, print_function) + +__metaclass__ = type + +DOCUMENTATION = ''' +--- +module: ebsmap +version_added: 1.0.0 +short_description: ebsmap +description: + - Map the EBS device name as defined in AWS (e.g. /dev/sdf) with the volume provided to the OS +author: + - Dougal Seeley + - Amazon.com inc. +''' + +EXAMPLES = ''' +- name: Get the nvme map information + ebsmap: + become: yes + register: r__ebsmap + +- name: ebsmap + debug: msg={{ebsmap}} +''' + +RETURN = ''' +"device_map": [ + { + "FSTYPE": "ext4", + "MOUNTPOINT": "/media/mysvc", + "NAME": "nvme1n1", + "PARTLABEL": "", + "SERIAL": "vol0c2c47ee4516063e9", + "TYPE": "disk", + "UUID": "c3630dbe-042e-44e5-ac67-54fa1c9e4cd2", + "device_name_aws": "/dev/sdf", + "device_name_os": "/dev/nvme1n1", + "volume_id": "vol-0c2c47ee4516063e9" + }, + { + "FSTYPE": "", + "MOUNTPOINT": "", + "NAME": "nvme0n1", + "PARTLABEL": "", + "SERIAL": "vol0b05e48d5677db81a", + "TYPE": "disk", + "UUID": "", + "device_name_aws": "/dev/sda1", + "device_name_os": "/dev/nvme0n1", + "volume_id": "vol-0b05e48d5677db81a" + }, + { + "FSTYPE": "ext4", + "MOUNTPOINT": "/", + "NAME": "nvme0n1p1", + "PARTLABEL": "", + "SERIAL": "", + "TYPE": "part", + "UUID": "96ec7adb-9d94-41c0-96a5-d6992c9d5f20", + "device_name_aws": "/dev/sda1", + "device_name_os": "/dev/nvme0n1p1", + "volume_id": "vol-0b05e48d5677db81a" + } + +"device_map": [ + { + "FSTYPE": "", + "MOUNTPOINT": "", + "NAME": "xvda", + "PARTLABEL": "", + "SERIAL": "", + "TYPE": "disk", + "UUID": "", + "device_name_aws": "/dev/sda", + "device_name_os": "/dev/xvda" + }, + { + "FSTYPE": "ext4", + "MOUNTPOINT": "/", + "NAME": "xvda1", + "PARTLABEL": "", + "SERIAL": "", + "TYPE": "part", + "UUID": "96ec7adb-9d94-41c0-96a5-d6992c9d5f20", + "device_name_aws": "/dev/sda1", + "device_name_os": "/dev/xvda1" + } +''' + +from ctypes import * +from fcntl import ioctl +import subprocess +import sys +import json +import re + +try: + from ansible.module_utils.basic import AnsibleModule + from ansible.errors import AnsibleError + from ansible.utils.display import Display +except: + pass + +NVME_ADMIN_IDENTIFY = 0x06 +NVME_IOCTL_ADMIN_CMD = 0xC0484E41 +AMZN_NVME_VID = 0x1D0F +AMZN_NVME_EBS_MN = "Amazon Elastic Block Store" + + +class nvme_admin_command(Structure): + _pack_ = 1 + _fields_ = [("opcode", c_uint8), # op code + ("flags", c_uint8), # fused operation + ("cid", c_uint16), # command id + ("nsid", c_uint32), # namespace id + ("reserved0", c_uint64), + ("mptr", c_uint64), # metadata pointer + ("addr", c_uint64), # data pointer + ("mlen", c_uint32), # metadata length + ("alen", c_uint32), # data length + ("cdw10", c_uint32), + ("cdw11", c_uint32), + ("cdw12", c_uint32), + ("cdw13", c_uint32), + ("cdw14", c_uint32), + ("cdw15", c_uint32), + ("reserved1", c_uint64)] + + +class nvme_identify_controller_amzn_vs(Structure): + _pack_ = 1 + _fields_ = [("bdev", c_char * 32), # block device name + ("reserved0", c_char * (1024 - 32))] + + +class nvme_identify_controller_psd(Structure): + _pack_ = 1 + _fields_ = [("mp", c_uint16), # maximum power + ("reserved0", c_uint16), + ("enlat", c_uint32), # entry latency + ("exlat", c_uint32), # exit latency + ("rrt", c_uint8), # relative read throughput + ("rrl", c_uint8), # relative read latency + ("rwt", c_uint8), # relative write throughput + ("rwl", c_uint8), # relative write latency + ("reserved1", c_char * 16)] + + +class nvme_identify_controller(Structure): + _pack_ = 1 + _fields_ = [("vid", c_uint16), # PCI Vendor ID + ("ssvid", c_uint16), # PCI Subsystem Vendor ID + ("sn", c_char * 20), # Serial Number + ("mn", c_char * 40), # Module Number + ("fr", c_char * 8), # Firmware Revision + ("rab", c_uint8), # Recommend Arbitration Burst + ("ieee", c_uint8 * 3), # IEEE OUI Identifier + ("mic", c_uint8), # Multi-Interface Capabilities + ("mdts", c_uint8), # Maximum Data Transfer Size + ("reserved0", c_uint8 * (256 - 78)), + ("oacs", c_uint16), # Optional Admin Command Support + ("acl", c_uint8), # Abort Command Limit + ("aerl", c_uint8), # Asynchronous Event Request Limit + ("frmw", c_uint8), # Firmware Updates + ("lpa", c_uint8), # Log Page Attributes + ("elpe", c_uint8), # Error Log Page Entries + ("npss", c_uint8), # Number of Power States Support + ("avscc", c_uint8), # Admin Vendor Specific Command Configuration + ("reserved1", c_uint8 * (512 - 265)), + ("sqes", c_uint8), # Submission Queue Entry Size + ("cqes", c_uint8), # Completion Queue Entry Size + ("reserved2", c_uint16), + ("nn", c_uint32), # Number of Namespaces + ("oncs", c_uint16), # Optional NVM Command Support + ("fuses", c_uint16), # Fused Operation Support + ("fna", c_uint8), # Format NVM Attributes + ("vwc", c_uint8), # Volatile Write Cache + ("awun", c_uint16), # Atomic Write Unit Normal + ("awupf", c_uint16), # Atomic Write Unit Power Fail + ("nvscc", c_uint8), # NVM Vendor Specific Command Configuration + ("reserved3", c_uint8 * (704 - 531)), + ("reserved4", c_uint8 * (2048 - 704)), + ("psd", nvme_identify_controller_psd * 32), # Power State Descriptor + ("vs", nvme_identify_controller_amzn_vs)] # Vendor Specific + + +class ebs_nvme_device: + def __init__(self, device): + self.device = device + self.ctrl_identify() + + def _nvme_ioctl(self, id_response, id_len): + admin_cmd = nvme_admin_command(opcode=NVME_ADMIN_IDENTIFY, addr=id_response, alen=id_len, cdw10=1) + with open(self.device, "rt") as nvme: + ioctl(nvme, NVME_IOCTL_ADMIN_CMD, admin_cmd) + + def ctrl_identify(self): + self.id_ctrl = nvme_identify_controller() + self._nvme_ioctl(addressof(self.id_ctrl), sizeof(self.id_ctrl)) + if self.id_ctrl.vid != AMZN_NVME_VID or self.id_ctrl.mn.decode().strip() != AMZN_NVME_EBS_MN: + raise TypeError("[ERROR] Not an EBS device: '{0}'".format(self.device)) + + def get_volume_id(self): + vol = self.id_ctrl.sn.decode() + if vol.startswith("vol") and vol[3] != "-": + vol = "vol-" + vol[3:] + return vol + + def get_block_device(self, stripped=False): + device = self.id_ctrl.vs.bdev.decode() + if stripped and device.startswith("/dev/"): + device = device[5:] + return device + + +def main(): + if not (len(sys.argv) > 1 and sys.argv[1] == "console"): + module = AnsibleModule(argument_spec={}, supports_check_mode=True) + else: + # For testing without Ansible (e.g on Windows) + class cDummyAnsibleModule(): + params = {} + + def exit_json(self, changed, **kwargs): + print(changed, json.dumps(kwargs, sort_keys=True, indent=4, separators=(',', ': '))) + + def warn(self, msg): + print("[WARNING]: " + msg) + + def fail_json(self, msg): + print("Failed: " + msg) + exit(1) + + module = cDummyAnsibleModule() + + # Get all existing block volumes by key=value, then parse this into a dictionary (which excludes non disk and partition block types, e.g. ram, loop). + lsblk_devices = subprocess.check_output(['lsblk', '-o', 'NAME,TYPE,UUID,FSTYPE,PARTLABEL,MOUNTPOINT,SERIAL', '-P']).decode().rstrip().split('\n') + os_device_names = [dict((map(lambda x: x.strip("\""), sub.split("="))) for sub in dev.split('\" ') if '=' in sub) for dev in lsblk_devices] + os_device_names = [dev for dev in os_device_names if dev['TYPE'] in ['disk', 'part', 'lvm']] + + for os_device in os_device_names: + os_device_path = "/dev/" + os_device['NAME'] + if os_device['NAME'].startswith("nvme"): + try: + dev = ebs_nvme_device(os_device_path) + except FileNotFoundError as e: + module.fail_json(msg=os_device_path + ": FileNotFoundError" + str(e)) + except OSError as e: + module.warn(u"%s is not an nvme device." % os_device_path) + else: + os_device.update({"device_name_os": os_device_path, "device_name_aws": '/dev/' + dev.get_block_device(stripped=True).rstrip(), "volume_id": dev.get_volume_id()}) + elif os_device['NAME'].startswith("xvd"): + os_device.update({"device_name_os": os_device_path, "device_name_aws": '/dev/' + re.sub(r'xvd(.*)', r'sd\1', os_device['NAME'])}) + else: + os_device.update({"device_name_os": os_device_path, "device_name_aws": ""}) + + module.exit_json(changed=False, device_map=os_device_names) + + +if __name__ == '__main__': + main() diff --git a/_dependencies/library/ebsmap__LICENSE b/_dependencies/library/ebsmap__LICENSE new file mode 100644 index 00000000..3c642ec5 --- /dev/null +++ b/_dependencies/library/ebsmap__LICENSE @@ -0,0 +1,29 @@ +BSD 3-Clause License + +Copyright (c) 2020, Dougal Seeley +All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are met: + +* Redistributions of source code must retain the above copyright notice, this + list of conditions and the following disclaimer. + +* Redistributions in binary form must reproduce the above copyright notice, + this list of conditions and the following disclaimer in the documentation + and/or other materials provided with the distribution. + +* Neither the name of the copyright holder nor the names of its + contributors may be used to endorse or promote products derived from + this software without specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE +DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE +FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR +SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER +CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, +OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. diff --git a/_dependencies/library/ebsmap__README.md b/_dependencies/library/ebsmap__README.md new file mode 100644 index 00000000..f38b360c --- /dev/null +++ b/_dependencies/library/ebsmap__README.md @@ -0,0 +1,24 @@ +# ebsmap + +This is an Ansible module that is able to map AWS EBS device names (including NVME devices) to the host device names. + +## Credits +The bulk of the heavy lifting is nvme ioctl commands written by AWS for their Amazon Linux AMIs. See: https://docs.aws.amazon.com/AWSEC2/latest/UserGuide/nvme-ebs-volumes.html + +## Execution +This can be run as an Ansible module (needs root): +```yaml +- name: Get the nvme map information + ebsmap: + become: yes + register: r__ebsmap + +- name: ebsmap + debug: msg={{ebsmap}} + +``` + +or from the console: +```bash +python3 ./ebsmap.py console +``` \ No newline at end of file diff --git a/_dependencies/tasks/main.yml b/_dependencies/tasks/main.yml index ad6b4d29..2d300f62 100644 --- a/_dependencies/tasks/main.yml +++ b/_dependencies/tasks/main.yml @@ -36,5 +36,5 @@ - assert: { that: "(cluster_vars.assign_public_ip == 'yes' and cluster_vars.inventory_ip == 'public') or (cluster_vars.inventory_ip == 'private')", fail_msg: "If inventory_ip=='public', 'assign_public_ip' must be 'yes'" } when: cluster_vars.type == "gcp" or cluster_vars.type == "aws" - - assert: { that: "cluster_vars[buildenv] | json_query(\"hosttype_vars.*.auto_volumes[] | [?contains(`/dev/sdb,/dev/sdc,/dev/sdd,/dev/sde`, device_name)]\") | length == 0", fail_msg: "/dev/sd[b-e] are not allowed as device_name in AWS cluster_vars[buildenv].hosttype_vars. Please start at /dev/sdf." } + - assert: { that: "cluster_vars[buildenv] | json_query(\"hosttype_vars.*.auto_volumes[] | [?contains(`/dev/sdb,/dev/sdc,/dev/sdd,/dev/sde`, device_name) && volume_type!='ephemeral']\") | length == 0", fail_msg: "device_names /dev/sd[b-e] are only allowed for ephemeral volumes in AWS cluster_vars[buildenv].hosttype_vars. Please start non-ephemeral devices at /dev/sdf." } when: cluster_vars.type == "aws" diff --git a/config/tasks/disks_auto_aws.yml b/config/tasks/disks_auto_aws.yml new file mode 100644 index 00000000..bc2f8859 --- /dev/null +++ b/config/tasks/disks_auto_aws.yml @@ -0,0 +1,109 @@ +--- + +- name: disks_auto_aws | auto_volumes + debug: msg={{ cluster_vars[buildenv].hosttype_vars[hostvars[inventory_hostname].hosttype].auto_volumes }} + +- name: disks_auto_aws | cluster_hosts_target(inventory_hostname) + debug: msg={{ (cluster_hosts_target | selectattr('hostname', '==', inventory_hostname) | list | first)['auto_volumes'] }} + + +- name: disks_auto_aws | Mount volumes as individual disks + block: + - name: disks_auto_aws | Get the nvme information (pre-filesystem create) + ebsmap: + become: yes + register: r__ebsmap + + - name: disks_auto_aws | r__ebsmap (pre-filesystem create) + debug: msg={{r__ebsmap}} + + - name: disks_auto_aws | Create filesystem (partitionless) + become: yes + filesystem: + fstype: "{{ item.fstype }}" + dev: "{{ (r__ebsmap.device_map | selectattr('device_name_aws', '==', item.device_name) | list | last)['device_name_os'] }}" + loop: "{{auto_vols}}" + + - name: disks_auto_aws | Get the nvme information (post-filesystem create), to get the block IDs for mounting + ebsmap: + become: yes + register: r__ebsmap + + - name: disks_auto_aws | r__ebsmap (post-filesystem create) + debug: msg={{r__ebsmap}} + + - name: disks_auto_aws | Mount created filesytem(s) persistently + become: yes + mount: + path: "{{ item.mountpoint }}" + src: "UUID={{ (r__ebsmap.device_map | selectattr('device_name_aws', '==', item.device_name) | list | last)['UUID'] }}" + fstype: "{{ item.fstype }}" + state: mounted + opts: _netdev + loop: "{{auto_vols}}" + + - name: disks_auto_aws | change ownership of mountpoint (if set) + become: yes + file: + path: "{{ item.mountpoint }}" + state: directory + mode: "{{ item.perms.mode | default(omit)}}" + owner: "{{ item.perms.owner | default(omit)}}" + group: "{{ item.perms.group | default(omit)}}" + loop: "{{auto_vols}}" + when: (auto_vols | map(attribute='mountpoint') | list | unique | count == auto_vols | map(attribute='mountpoint') | list | count) + vars: + auto_vols: "{{ cluster_vars[buildenv].hosttype_vars[hostvars[inventory_hostname].hosttype].auto_volumes }}" + + +# The following block mounts all nvme attached volumes that have a single, common mountpoint, by creating a logical volume +- name: disks_auto_aws | Mount nvme volumes in a single mountpoint through LV/VG + block: + - name: disks_auto_aws | Install logical volume management tooling. (yum - RedHat/CentOS) + become: true + yum: + name: "lvm*" + state: present + when: ansible_os_family == 'RedHat' + + - name: disks_auto_aws | Get the nvme information (pre-filesystem create) + ebsmap: + become: yes + register: r__ebsmap + + - name: disks_auto_aws | r__ebsmap (pre-filesystem create) + debug: msg={{r__ebsmap}} + + - name: disks_auto_aws | Create a volume group from all nvme devices + become: yes + lvg: + vg: "{{ hosttype_vars.lvmparams.vg_name }}" + pvs: "{{ r__ebsmap.device_map | json_query(\"[?device_name_aws && contains('\" + auto_vol_device_names + \"', device_name_aws)].device_name_os\") | join(',')}}" + vars: + auto_vol_device_names: "{{hosttype_vars.auto_volumes | map(attribute='device_name') | sort | join(',')}}" + + - name: disks_auto_aws | Create a logical volume from volume group + become: yes + lvol: + vg: "{{ hosttype_vars.lvmparams.vg_name }}" + lv: "{{ hosttype_vars.lvmparams.lv_name }}" + size: "{{ hosttype_vars.lvmparams.lv_size }}" + + - name: disks_auto_aws | Create filesystem(s) on attached nvme volume(s) + become: yes + filesystem: + fstype: "{{ hosttype_vars.auto_volumes | map(attribute='fstype') | list | unique | join('') }}" + dev: "/dev/{{ hosttype_vars.lvmparams.vg_name }}/{{ hosttype_vars.lvmparams.lv_name }}" + force: no + + - name: disks_auto_aws | Mount created filesytem(s) persistently + become: yes + mount: + path: "{{ hosttype_vars.auto_volumes | map(attribute='mountpoint') | list | unique | join('') }}" + src: "/dev/{{ hosttype_vars.lvmparams.vg_name }}/{{ hosttype_vars.lvmparams.lv_name }}" + fstype: "{{ hosttype_vars.auto_volumes | map(attribute='fstype') | list | unique | join('') }}" + state: mounted + opts: _netdev + when: ('lvmparams' in hosttype_vars) and (hosttype_vars.auto_volumes | map(attribute='mountpoint') | list | unique | count == 1) and (hosttype_vars.auto_volumes | map(attribute='mountpoint') | list | count >= 2) and (hosttype_vars.auto_volumes | map(attribute='fstype') | list | unique | count == 1) + vars: + hosttype_vars: "{{ cluster_vars[buildenv].hosttype_vars[hostvars[inventory_hostname].hosttype] }}" diff --git a/config/tasks/disks_auto_aws_nvme.yml b/config/tasks/disks_auto_aws_nvme.yml deleted file mode 100644 index c1623f2a..00000000 --- a/config/tasks/disks_auto_aws_nvme.yml +++ /dev/null @@ -1,112 +0,0 @@ ---- -#- debug: msg={{ cluster_vars[buildenv].hosttype_vars[hostvars[inventory_hostname].hosttype].nvme_volumes }} -#- debug: msg={{ ansible_facts.devices }} -- block: - - name: autodisks_nvme | Get unused block devices - set_fact: - block_devices: "{{ {'dev': item, 'size_b': (ansible_facts.devices[item].sectors|int) * (ansible_facts.devices[item].sectorsize|int)} }}" - with_items: "{{ ansible_facts.devices }}" - register: block_devices_list - when: item | regex_search("nvme") and ansible_facts.devices[item].partitions == {} - - - name: autodisks_nvme | Create unused block devices list - set_fact: - lsblk_volumes: "{{ block_devices_list.results | map(attribute='ansible_facts.block_devices') | select('defined') | list }}" - - - name: autodisks_nvme | lsblk_volumes - debug: msg={{ lsblk_volumes }} - - - name: autodisks_nvme | Create 'nvmevols' fact that contains a list of available host nvme devices (lsblk) mapped to the mountpoints defined in cluster_vars. Handles single mounting points with LV/VG - set_fact: - nvmevols: | - {% set res = [] -%} - {% set tmp_blkvols = lsblk_volumes -%} - {%- for nvmevol in cluster_vars[buildenv].hosttype_vars[hostvars[inventory_hostname].hosttype].nvme.volumes -%} - {%- set blkvolloop = namespace(break=false) -%} - {%- for blkvol in tmp_blkvols if not blkvolloop.break -%} - {%- if (nvmevol.volume_size*1000000000|int) == (blkvol.size_b|int) -%} - {%- set _ = res.extend([ {'device': '/dev/'+blkvol.dev, 'mountpoint': nvmevol.mountpoint, 'fstype': nvmevol.fstype, 'perms': autovol.perms | default({})}]) -%} - {%- set _ = tmp_blkvols.remove(blkvol) -%} - {%- set blkvolloop.break = true -%} - {%- endif -%} - {%- endfor -%} - {%- endfor -%} - {{ res }} - - - name: autodisks_nvme | nvme mountpoints - debug: msg={{ nvmevols | map(attribute='mountpoint') | list | unique }} - - # The following block mounts all nvme attached volumes that have individual mountpoints - - name: autodisks_nvme | Mount nvme volumes with different mountpoints - block: - - name: autodisks_nvme | Create filesystem(s) on attached nvme volume(s) - become: yes - filesystem: - fstype: "{{ item.fstype }}" - dev: "{{ item.device }}" - force: no - with_items: "{{ nvmevols }}" - - - name: autodisks_nvme | Mount nvme created filesytem(s) persistently - become: yes - mount: - path: "{{ item.mountpoint }}" - src: "{{ item.device }}" - fstype: "{{ item.fstype }}" - state: mounted - opts: _netdev - with_items: "{{ nvmevols }}" - - - name: autodisks_nvme | change ownership of mountpoint (if set) - become: yes - file: - path: "{{ item.mountpoint }}" - state: directory - mode: "{{ item.perms.mode | default(omit)}}" - owner: "{{ item.perms.owner | default(omit)}}" - group: "{{ item.perms.group | default(omit)}}" - with_items: "{{ nvmevols }}" - when: (nvmevols | map(attribute='mountpoint') | list | unique | count == nvmevols | map(attribute='mountpoint') | list | count) - - # The following block mounts all nvme attached volumes that have a single, common mountpoint, by creating a logical volume - - name: autodisks_nvme | Mount nvme volumes in a single mountpoint through LV/VG - block: - #- debug: msg={{nvmevols | map(attribute='device') | join(',')}} - - - name: autodisks_nvme | Install logical volume management tooling. (yum - RedHat/CentOS) - become: true - yum: - name: "lvm*" - state: present - when: ansible_os_family == 'RedHat' - - - name: autodisks_nvme | Create a volume group from all nvme devices - become: yes - lvg: - vg: "{{ cluster_vars[buildenv].hosttype_vars[hostvars[inventory_hostname].hosttype].nvme.lvmparams.vg_name }}" - pvs: "{{nvmevols | map(attribute='device') | join(',')}}" - - - name: autodisks_nvme | Create a logical volume from volume group - become: yes - lvol: - vg: "{{ cluster_vars[buildenv].hosttype_vars[hostvars[inventory_hostname].hosttype].nvme.lvmparams.vg_name }}" - lv: "{{ cluster_vars[buildenv].hosttype_vars[hostvars[inventory_hostname].hosttype].nvme.lvmparams.lv_name }}" - size: "{{ cluster_vars[buildenv].hosttype_vars[hostvars[inventory_hostname].hosttype].nvme.lvmparams.lv_size }}" - - - name: autodisks_nvme | Create filesystem(s) on attached nvme volume(s) - become: yes - filesystem: - fstype: "{{ nvmevols | map(attribute='fstype') | list | unique | join('') }}" - dev: "/dev/{{ cluster_vars[buildenv].hosttype_vars[hostvars[inventory_hostname].hosttype].nvme.lvmparams.vg_name }}/{{ cluster_vars[buildenv].hosttype_vars[hostvars[inventory_hostname].hosttype].nvme.lvmparams.lv_name }}" - force: no - - - name: autodisks_nvme | Mount created filesytem(s) persistently - become: yes - mount: - path: "{{ nvmevols | map(attribute='mountpoint') | list | unique | join('') }}" - src: "/dev/{{ cluster_vars[buildenv].hosttype_vars[hostvars[inventory_hostname].hosttype].nvme.lvmparams.vg_name }}/{{ cluster_vars[buildenv].hosttype_vars[hostvars[inventory_hostname].hosttype].nvme.lvmparams.lv_name }}" - fstype: "{{ nvmevols | map(attribute='fstype') | list | unique | join('') }}" - state: mounted - opts: _netdev - when: (nvmevols | map(attribute='mountpoint') | list | unique | count == 1) and (nvmevols | map(attribute='mountpoint') | list | count >= 2) and (nvmevols | map(attribute='fstype') | list | unique | count == 1) - when: (cluster_vars[buildenv].hosttype_vars[hostvars[inventory_hostname].hosttype].nvme.volumes is defined) and (cluster_vars[buildenv].hosttype_vars[hostvars[inventory_hostname].hosttype].nvme.volumes|length > 0) and (cluster_vars[buildenv].hosttype_vars[hostvars[inventory_hostname].hosttype].nvme.volumes != "[]") diff --git a/config/tasks/disks_auto.yml b/config/tasks/disks_auto_generic.yml similarity index 79% rename from config/tasks/disks_auto.yml rename to config/tasks/disks_auto_generic.yml index aefd1d5b..cde01eac 100644 --- a/config/tasks/disks_auto.yml +++ b/config/tasks/disks_auto_generic.yml @@ -1,22 +1,24 @@ --- + #- debug: msg={{ cluster_vars[buildenv].hosttype_vars[hostvars[inventory_hostname].hosttype].auto_volumes }} #- debug: msg={{ ansible_facts.devices }} + - block: - - name: autodisks | Get unused block devices + - name: disks_auto_generic | Get unused block devices set_fact: block_devices: "{{ {'dev': item, 'size_b': (ansible_facts.devices[item].sectors|int) * (ansible_facts.devices[item].sectorsize|int)} }}" with_items: "{{ ansible_facts.devices }}" register: block_devices_list when: item | regex_search("nvme|[xvsh]+d") and ansible_facts.devices[item].partitions == {} - - name: autodisks | Create unused block devices list + - name: disks_auto_generic | Create unused block devices list set_fact: lsblk_volumes: "{{ block_devices_list.results | map(attribute='ansible_facts.block_devices') | select('defined') | list }}" - - name: autodisks | lsblk_volumes + - name: disks_auto_generic | lsblk_volumes debug: msg={{ lsblk_volumes }} -- name: autodisks | Create 'hostvols' fact that contains a list of available host devices (lsblk) mapped to the mountpoints defined in cluster_vars. Allow for multiple disks with same size. +- name: disks_auto_generic | Create 'hostvols' fact that contains a list of available host devices (lsblk) mapped to the mountpoints defined in cluster_vars. Allow for multiple disks with same size. set_fact: hostvols: | {% set res = [] -%} @@ -31,14 +33,13 @@ {%- set blkvolloop.break = true -%} {%- endif -%} {%- endfor -%} - {%- endfor -%} {{ res }} -#- name: autodisks | hostvols +#- name: disks_auto_generic | hostvols # debug: msg={{hostvols}} # Create partition-less filesystems. -- name: autodisks | Create filesystem(s) on attached volume(s) +- name: disks_auto_generic | Create filesystem(s) on attached volume(s) become: yes filesystem: fstype: "{{ item.fstype }}" @@ -50,7 +51,7 @@ delay: 1 until: created_filesystem is not failed -- name: autodisks | Mount created filesytem(s) persistently +- name: disks_auto_generic | Mount created filesytem(s) persistently become: yes mount: path: "{{ item.mountpoint }}" @@ -60,7 +61,7 @@ opts: _netdev with_items: "{{ hostvols }}" -- name: autodisks | change ownership of mountpoint (if set) +- name: disks_auto_generic | change ownership of mountpoint (if set) become: yes file: path: "{{ item.mountpoint }}" diff --git a/config/tasks/main.yml b/config/tasks/main.yml index e842014a..5a23db5e 100644 --- a/config/tasks/main.yml +++ b/config/tasks/main.yml @@ -52,13 +52,13 @@ mode: 0755 when: (static_journal is defined and static_journal|bool) - # Run this *before* the general auto_volumes tasks, because we need them to be eliminated before we try to mount the other disks. -- name: Attach nvme_volumes - include_tasks: disks_auto_aws_nvme.yml +- name: Create partition table, format and attach volumes - AWS + include_tasks: disks_auto_aws.yml when: cluster_vars.type == "aws" -- name: Attach auto_volumes - include_tasks: disks_auto.yml +- name: Create partition table, format and attach volumes - generic + include_tasks: disks_auto_generic.yml + when: cluster_vars.type != "aws" - name: install prometheus node exporter daemon include_tasks: prometheus_node_exporter.yml diff --git a/create/tasks/aws.yml b/create/tasks/aws.yml index 65192a18..4aee2484 100644 --- a/create/tasks/aws.yml +++ b/create/tasks/aws.yml @@ -1,5 +1,8 @@ --- +- name: cluster_hosts_target_denormalised_by_volume + debug: msg="{{cluster_hosts_target_denormalised_by_volume}}" + - name: create/aws | Create AWS security group ec2_group: name: "{{ cluster_name }}-sg" @@ -45,6 +48,7 @@ wait: yes instance_tags: "{{ _instance_tags | combine(cluster_vars.custom_tagslabels | default({})) }}" termination_protection: "{{cluster_vars[buildenv].termination_protection}}" + volumes: "{{ item.auto_volumes | selectattr('src', 'undefined') | list | default([]) }}" count_tag: { Name: "{{item.hostname}}" } exact_count: 1 vars: @@ -79,7 +83,7 @@ set_fact: cluster_hosts_created: "{{ r__async_status__ec2.results | json_query(\"[?changed==`true`].item.item\") }}" - - name: create/aws | Create new volumes asynchronously (or attach existing if src is present, e.g. via the _scheme_rmvm_keepdisk_rollback scheme) + - name: create/aws | Attach (or create) volumes where 'src' is present (e.g. inserted as part of _scheme_rmvm_keepdisk_rollback scheme) ec2_vol: aws_access_key: "{{cluster_vars[buildenv].aws_access_key}}" aws_secret_key: "{{cluster_vars[buildenv].aws_secret_key}}" @@ -92,7 +96,7 @@ volume_size: "{%- if 'src' not in item.auto_volume -%}{{item.auto_volume.volume_size}}{%- endif -%}" volume_type: "{{item.auto_volume.volume_type}}" delete_on_termination: yes - loop: "{{ cluster_hosts_target_denormalised_by_volume }}" + loop: "{{ cluster_hosts_target_denormalised_by_volume| selectattr('src', 'defined') | list }}" async: 7200 poll: 0 register: r__ec2_vol @@ -105,6 +109,9 @@ retries: 300 with_items: "{{r__ec2_vol.results}}" +# - name: create/aws | r__async_status__ec2_vol +# debug: msg={{r__async_status__ec2_vol}} + - name: create/aws | Tag the EBS volumes block: @@ -138,6 +145,7 @@ {{ res }} _tags: Name: "{{ item.hostname }}--{{item.device_name | regex_replace('^.*\\/(.*)', '\\1')}}" + device_name: "{{item.device_name}}" inv_node_version: "{{cluster_vars[buildenv].hosttype_vars[item.hosttype].version | default(omit)}}" inv_node_type: "{{item.hosttype}}" owner: "{{ lookup('env','USER') | lower }}" diff --git a/redeploy/_scheme_addallnew_rmdisk_rollback/tasks/main.yml b/redeploy/_scheme_addallnew_rmdisk_rollback/tasks/main.yml index 29ee5ac6..728a5618 100644 --- a/redeploy/_scheme_addallnew_rmdisk_rollback/tasks/main.yml +++ b/redeploy/_scheme_addallnew_rmdisk_rollback/tasks/main.yml @@ -5,7 +5,7 @@ - assert: { that: "non_current_hosts | length == 0", msg: "ERROR - All VMs must be in the 'current' lifecycle_state. Those not [{{non_current_hosts | join(',')}}]" } vars: non_current_hosts: "{{ cluster_hosts_state | json_query(\"[?tagslabels.lifecycle_state!='current'].name\") }}" - when: (canary=="start" or canary=="none") and (ignore_lifecycle_check is not defined or ignore_lifecycle_check==false) + when: canary=="start" or canary=="none" - name: Redeploy by replacing entire cluster; rollback on fail diff --git a/redeploy/_scheme_addnewvm_rmdisk_rollback/tasks/main.yml b/redeploy/_scheme_addnewvm_rmdisk_rollback/tasks/main.yml index 08f668a9..2a71a918 100644 --- a/redeploy/_scheme_addnewvm_rmdisk_rollback/tasks/main.yml +++ b/redeploy/_scheme_addnewvm_rmdisk_rollback/tasks/main.yml @@ -5,7 +5,7 @@ - assert: { that: "non_current_hosts | length == 0", msg: "ERROR - All VMs must be in the 'current' lifecycle_state. Those not [{{non_current_hosts | join(',')}}]" } vars: non_current_hosts: "{{ cluster_hosts_state | json_query(\"[?tagslabels.lifecycle_state!='current'].name\") }}" - when: (canary=="start" or canary=="none") and (ignore_lifecycle_check is not defined or ignore_lifecycle_check==false) + when: canary=="start" or canary=="none" - name: Redeploy by hosttype; rollback on fail diff --git a/redeploy/_scheme_rmvm_keepdisk_rollback/tasks/_add_src_diskinfo_to_cluster_hosts_target__aws.yml b/redeploy/_scheme_rmvm_keepdisk_rollback/tasks/_add_src_diskinfo_to_cluster_hosts_target__aws.yml index 20bde44d..07db0737 100644 --- a/redeploy/_scheme_rmvm_keepdisk_rollback/tasks/_add_src_diskinfo_to_cluster_hosts_target__aws.yml +++ b/redeploy/_scheme_rmvm_keepdisk_rollback/tasks/_add_src_diskinfo_to_cluster_hosts_target__aws.yml @@ -10,8 +10,8 @@ region: "{{cluster_vars.region}}" register: r__ec2_instance_info -#- name: _get_diskinfo_aws | r__ec2_instance_info -# debug: msg={{r__ec2_instance_info}} +- name: _get_diskinfo_aws | r__ec2_instance_info + debug: msg={{r__ec2_instance_info}} - name: _get_diskinfo_aws | augment cluster_hosts_target auto_volumes with source disk info set_fact: @@ -21,7 +21,7 @@ {%- for chs_host_info_result in r__ec2_instance_info.instances | selectattr('tags.lifecycle_state', '!=', 'current') -%} {%- if cht_host.hostname | regex_replace('-(?!.*-).*') == chs_host_info_result.tags.Name | regex_replace('-(?!.*-).*') -%} {%- for chs_host_diskinfo in chs_host_info_result.block_device_mappings | selectattr('device_name', '==', cht_autovol.device_name) -%} - {%- set _ = cht_autovol.update({'src': {'instance_id': chs_host_info_result.instance_id, 'volume_id': chs_host_diskinfo.ebs.volume_id }}) -%} + {%- set _ = cht_autovol.update({'src': {'instance_id': chs_host_info_result.instance_id, 'device_name': chs_host_diskinfo.device_name, 'volume_id': chs_host_diskinfo.ebs.volume_id }}) -%} {%- endfor -%} {%- endif -%} {%- endfor -%} diff --git a/redeploy/_scheme_rmvm_keepdisk_rollback/tasks/preflight.yml b/redeploy/_scheme_rmvm_keepdisk_rollback/tasks/preflight.yml index 600f955d..f04a2b13 100644 --- a/redeploy/_scheme_rmvm_keepdisk_rollback/tasks/preflight.yml +++ b/redeploy/_scheme_rmvm_keepdisk_rollback/tasks/preflight.yml @@ -3,6 +3,8 @@ - name: Preflight check block: - block: + - assert: { that: "cluster_vars.type != 'gcp'", fail_msg: "This scheme is not supported on GCP." } + - name: Preflight check | get ec2_instance_info for current disk information ec2_instance_info: filters: { "instance-state-name": [ "running", "stopped" ], "tag:cluster_name": "{{cluster_name}}", "tag:lifecycle_state": "current" } @@ -11,10 +13,10 @@ region: "{{cluster_vars.region}}" register: r__ec2_instance_info - - assert: { that: "_invalid_disks | length == 0", fail_msg: "EBS disks with a device_name of /dev/sd[b-e] cannot be reattached to a new instance (an AWS limitation) [found on: {{ _invalid_disks | join(',')}}]. To replace these, you must use a redeploy scheme that copies the disks." } + - assert: { that: "_invalid_disks | length == 0", fail_msg: "Disks cannot be attached to /dev/sd[b-e] after the instance has been created (these are supposed to be ephemeral mounts only, so can only exist if created with the VM). [Found on: {{ _invalid_disks | join(',')}}]. If you have EBS disks, you'll need to move them to another mount point (a redeploy scheme that replaces the disks will do this" } vars: { _invalid_disks: "{{ r__ec2_instance_info.instances | json_query(\"[?block_device_mappings[?contains(`/dev/sdb,/dev/sdc,/dev/sdd,/dev/sde`, device_name)]].tags.Name\") }}" } - - assert: { that: "ec2_disks_is_subset_of_target_disks|bool==false", fail_msg: "Existing EBS disks must have be same as (or a subset of) the auto_volumes defined in your cluster_vars definition.", success_msg: "Success - Existing EBS disks are the same as (or a subset of) the auto_volumes defined in your cluster_vars definition" } + - assert: { that: "ec2_disks_is_subset_of_target_disks|bool==false", fail_msg: "Existing EBS disks must be the same as (or a subset of) the auto_volumes defined in your cluster_vars definition.", success_msg: "Success - Existing EBS disks are the same as (or a subset of) the auto_volumes defined in your cluster_vars definition" } vars: ec2_disks_is_subset_of_target_disks: |- {%- set testloop = namespace(is_not_subset=false) -%} @@ -35,6 +37,6 @@ - assert: { that: "non_current_hosts | length == 0", fail_msg: "ERROR - All VMs must be in the 'current' lifecycle_state. Those not [{{non_current_hosts | join(',')}}]" } vars: { non_current_hosts: "{{ cluster_hosts_state | json_query(\"[?tagslabels.lifecycle_state!='current'].name\") }}" } - when: (canary=="start" or canary=="none") and (ignore_lifecycle_check is not defined or ignore_lifecycle_check==false) + when: (canary=="start" or canary=="none") - assert: { that: "(cluster_hosts_state | selectattr('tagslabels.lifecycle_state', '==', 'current') | list | length) == (cluster_hosts_target | length)", fail_msg: "Cannot use this scheme to redeploy to a different-sized cluster" } From 69a69ac4ba6b4ed029146a8994d0024f514a2f50 Mon Sep 17 00:00:00 2001 From: Dougal Seeley Date: Mon, 5 Oct 2020 17:25:04 +0100 Subject: [PATCH 33/58] Update ebsmap.py to support NVME instance stores --- EXAMPLE/group_vars/_skel/cluster_vars.yml | 13 +++++---- .../group_vars/test_aws_euw1/cluster_vars.yml | 11 ++++---- _dependencies/library/ebsmap.py | 27 +++++++++++++++++-- _dependencies/library/ebsmap__LICENSE | 24 +++++++++++++++++ 4 files changed, 60 insertions(+), 15 deletions(-) diff --git a/EXAMPLE/group_vars/_skel/cluster_vars.yml b/EXAMPLE/group_vars/_skel/cluster_vars.yml index 4f004bd4..5a1c2b0a 100644 --- a/EXAMPLE/group_vars/_skel/cluster_vars.yml +++ b/EXAMPLE/group_vars/_skel/cluster_vars.yml @@ -71,15 +71,14 @@ cluster_name: "{{app_name}}-{{buildenv}}" # Identifies the cluster within # rule_desc: "Access from all VMs attached to the {{ cluster_name }}-sg group" # sandbox: # hosttype_vars: -# sys: {vms_by_az: {a: 1, b: 1, c: 1}, flavor: t3a.nano, version: "{{sys_version | default('')}}", auto_volumes: []} +# sys: {vms_by_az: {a: 1, b: 0, c: 0}, flavor: t3a.nano, version: "{{sys_version | default('')}}", auto_volumes: []} # #sysdisks2: {vms_by_az: {a: 1, b: 0, c: 0}, flavor: t3a.nano, version: "{{sysdisks_version | default('')}}", auto_volumes: [{"device_name": "/dev/sdf", mountpoint: "/media/mysvc", fstype: "ext4", "volume_type": "gp2", "volume_size": 1, encrypted: True, "delete_on_termination": true, perms: {owner: "root", group: "sudo", mode: "775"} }, {"device_name": "/dev/sdg", mountpoint: "/media/mysvc2", fstype: "ext4", "volume_type": "gp2", "volume_size": 1, encrypted: True, "delete_on_termination": true}]} # #sysdisks3: {vms_by_az: {a: 1, b: 0, c: 0}, flavor: t3a.nano, version: "{{sysdisks_version | default('')}}", auto_volumes: [{"device_name": "/dev/sdf", mountpoint: "/media/mysvc", fstype: "ext4", "volume_type": "gp2", "volume_size": 1, encrypted: True, "delete_on_termination": true, perms: {owner: "root", group: "sudo", mode: "775"} }, {"device_name": "/dev/sdg", mountpoint: "/media/mysvc2", fstype: "ext4", "volume_type": "gp2", "volume_size": 1, encrypted: True, "delete_on_termination": true}, {"device_name": "/dev/sdh", mountpoint: "/media/mysvc3", fstype: "ext4", "volume_type": "gp2", "volume_size": 1, encrypted: True, "delete_on_termination": true}]} -# #sysdisks_snapshot: {vms_by_az: {a: 1, b: 1, c: 0}, flavor: t3a.nano, version: "{{sys_version | default('')}}", auto_volumes: [{"snapshot_tags": {"tag:backup_id": "57180566894481854905"}, "device_name": "/dev/sdf", mountpoint: "/data", fstype: "ext4", "volume_type": "gp2", "volume_size": 1, encrypted: True, "delete_on_termination": true }]} -# #hostnvme_multi: {vms_by_az: {a: 1, b: 0, c: 0}, flavor: i3en.2xlarge, version: "{{sys_version | default('')}}", auto_volumes: [{device_name: "/dev/sdb", mountpoint: "/media/data", fstype: "ext4", "volume_type": "ephemeral", ephemeral: ephemeral0}, {device_name: "/dev/sdbc", mountpoint: "/media/data", fstype: "ext4", "volume_type": "ephemeral", ephemeral: ephemeral0}] } } -# #hostnvme_lvm: {vms_by_az: {a: 1, b: 0, c: 0}, flavor: i3en.2xlarge, version: "{{sys_version | default('')}}", auto_volumes: [{device_name: "/dev/sdb", mountpoint: "/media/data", fstype: "ext4", "volume_type": "ephemeral", ephemeral: ephemeral0}, {device_name: "/dev/sdbc", mountpoint: "/media/data", fstype: "ext4", "volume_type": "ephemeral", ephemeral: ephemeral0}], lvmparams: {vg_name: "vg0", lv_name: "lv0", lv_size: "+100%FREE"} } -# #hosthdd_multi: {vms_by_az: {a: 1, b: 0, c: 0}, flavor: d2.xlarge, version: "{{sys_version | default('')}}", auto_volumes: [{device_name: "/dev/sdb", mountpoint: "/media/mysvc", fstype: "ext4", "volume_type": "ephemeral", ephemeral: ephemeral0}, {device_name: "/dev/sdc", mountpoint: "/media/mysvc2", fstype: "ext4", "volume_type": "ephemeral", ephemeral: ephemeral1}, {device_name: "/dev/sdd", mountpoint: "/media/mysvc3", fstype: "ext4", "volume_type": "ephemeral", ephemeral: ephemeral2}] } -# #hosthdd_lvm: {vms_by_az: {a: 1, b: 0, c: 0}, flavor: d2.xlarge, version: "{{sys_version | default('')}}", auto_volumes: [{device_name: "/dev/sdb", mountpoint: "/media/data", fstype: "ext4", "volume_type": "ephemeral", ephemeral: ephemeral0}, {device_name: "/dev/sdc", mountpoint: "/media/data", fstype: "ext4", "volume_type": "ephemeral", ephemeral: ephemeral1}, {device_name: "/dev/sdd", mountpoint: "/media/data", fstype: "ext4", "volume_type": "ephemeral", ephemeral: ephemeral2}], lvmparams: {vg_name: "vg0", lv_name: "lv0", lv_size: "+100%FREE"} } -# #hostssd: {vms_by_az: {a: 1, b: 0, c: 0}, flavor: c3.large, version: "{{sys_version | default('')}}", auto_volumes: [{device_name: "/dev/sdf", mountpoint: "/media/mysvc", fstype: "ext4", "volume_type": "gp2", "volume_size": 1, ephemeral: 0, encrypted: True, "delete_on_termination": true}]} +# #sysdisks-snapshot: {vms_by_az: {a: 1, b: 1, c: 0}, flavor: t3a.nano, version: "{{sys_version | default('')}}", auto_volumes: [{"snapshot_tags": {"tag:backup_id": "57180566894481854905"}, "device_name": "/dev/sdf", mountpoint: "/media/data", fstype: "ext4", "volume_type": "gp2", "volume_size": 1, encrypted: True, "delete_on_termination": true }]} +# #hostnvme-multi: {vms_by_az: {a: 1, b: 0, c: 0}, flavor: i3en.2xlarge, version: "{{sys_version | default('')}}", auto_volumes: [{device_name: "/dev/sdb", mountpoint: "/media/mysvc", fstype: "ext4", "volume_type": "ephemeral", ephemeral: ephemeral0}, {device_name: "/dev/sdc", mountpoint: "/media/mysvc2", fstype: "ext4", "volume_type": "ephemeral", ephemeral: ephemeral1}, {"device_name": "/dev/sdf", mountpoint: "/media/mysvc8", fstype: "ext4", "volume_type": "gp2", "volume_size": 1, encrypted: True, "delete_on_termination": true }] } +# #hostnvme-lvm: {vms_by_az: {a: 1, b: 0, c: 0}, flavor: i3en.2xlarge, version: "{{sys_version | default('')}}", auto_volumes: [{device_name: "/dev/sdb", mountpoint: "/media/data", fstype: "ext4", "volume_type": "ephemeral", ephemeral: ephemeral0}, {device_name: "/dev/sdc", mountpoint: "/media/data", fstype: "ext4", "volume_type": "ephemeral", ephemeral: ephemeral1}], lvmparams: {vg_name: "vg0", lv_name: "lv0", lv_size: "+100%FREE"} } +# #hosthdd-multi: {vms_by_az: {a: 1, b: 0, c: 0}, flavor: d2.xlarge, version: "{{sys_version | default('')}}", auto_volumes: [{device_name: "/dev/sdb", mountpoint: "/media/mysvc", fstype: "ext4", "volume_type": "ephemeral", ephemeral: ephemeral0}, {device_name: "/dev/sdc", mountpoint: "/media/mysvc2", fstype: "ext4", "volume_type": "ephemeral", ephemeral: ephemeral1}, {device_name: "/dev/sdd", mountpoint: "/media/mysvc3", fstype: "ext4", "volume_type": "ephemeral", ephemeral: ephemeral2}] } +# #hosthdd-lvm: {vms_by_az: {a: 1, b: 0, c: 0}, flavor: d2.xlarge, version: "{{sys_version | default('')}}", auto_volumes: [{device_name: "/dev/sdb", mountpoint: "/media/data", fstype: "ext4", "volume_type": "ephemeral", ephemeral: ephemeral0}, {device_name: "/dev/sdc", mountpoint: "/media/data", fstype: "ext4", "volume_type": "ephemeral", ephemeral: ephemeral1}, {device_name: "/dev/sdd", mountpoint: "/media/data", fstype: "ext4", "volume_type": "ephemeral", ephemeral: ephemeral2}], lvmparams: {vg_name: "vg0", lv_name: "lv0", lv_size: "+100%FREE"} } # aws_access_key: "" # aws_secret_key: "" # vpc_name: "test{{buildenv}}" diff --git a/EXAMPLE/group_vars/test_aws_euw1/cluster_vars.yml b/EXAMPLE/group_vars/test_aws_euw1/cluster_vars.yml index 810f4be2..e063d8c8 100644 --- a/EXAMPLE/group_vars/test_aws_euw1/cluster_vars.yml +++ b/EXAMPLE/group_vars/test_aws_euw1/cluster_vars.yml @@ -81,12 +81,11 @@ cluster_vars: sys: {vms_by_az: {a: 1, b: 1, c: 1}, flavor: t3a.nano, version: "{{sys_version | default('')}}", auto_volumes: []} # sysdisks2: {vms_by_az: {a: 1, b: 0, c: 0}, flavor: t3a.nano, version: "{{sysdisks_version | default('')}}", auto_volumes: [{"device_name": "/dev/sdf", mountpoint: "/media/mysvc", fstype: "ext4", "volume_type": "gp2", "volume_size": 1, encrypted: True, "delete_on_termination": true, perms: {owner: "root", group: "sudo", mode: "775"} }, {"device_name": "/dev/sdg", mountpoint: "/media/mysvc2", fstype: "ext4", "volume_type": "gp2", "volume_size": 1, encrypted: True, "delete_on_termination": true}]} # sysdisks3: {vms_by_az: {a: 1, b: 0, c: 0}, flavor: t3a.nano, version: "{{sysdisks_version | default('')}}", auto_volumes: [{"device_name": "/dev/sdf", mountpoint: "/media/mysvc", fstype: "ext4", "volume_type": "gp2", "volume_size": 1, encrypted: True, "delete_on_termination": true, perms: {owner: "root", group: "sudo", mode: "775"} }, {"device_name": "/dev/sdg", mountpoint: "/media/mysvc2", fstype: "ext4", "volume_type": "gp2", "volume_size": 1, encrypted: True, "delete_on_termination": true}, {"device_name": "/dev/sdh", mountpoint: "/media/mysvc3", fstype: "ext4", "volume_type": "gp2", "volume_size": 1, encrypted: True, "delete_on_termination": true}]} -# sysdisks_snapshot: {vms_by_az: {a: 1, b: 1, c: 0}, flavor: t3a.nano, version: "{{sys_version | default('')}}", auto_volumes: [{"snapshot_tags": {"tag:backup_id": "57180566894481854905"}, "device_name": "/dev/sdf", mountpoint: "/data", fstype: "ext4", "volume_type": "gp2", "volume_size": 1, encrypted: True, "delete_on_termination": true }]} -# hostnvme_multi: {vms_by_az: {a: 1, b: 0, c: 0}, flavor: i3en.2xlarge, version: "{{sys_version | default('')}}", auto_volumes: [{device_name: "/dev/sdb", mountpoint: "/media/data", fstype: "ext4", "volume_type": "ephemeral", ephemeral: ephemeral0}, {device_name: "/dev/sdbc", mountpoint: "/media/data", fstype: "ext4", "volume_type": "ephemeral", ephemeral: ephemeral0}] } } -# hostnvme_lvm: {vms_by_az: {a: 1, b: 0, c: 0}, flavor: i3en.2xlarge, version: "{{sys_version | default('')}}", auto_volumes: [{device_name: "/dev/sdb", mountpoint: "/media/data", fstype: "ext4", "volume_type": "ephemeral", ephemeral: ephemeral0}, {device_name: "/dev/sdbc", mountpoint: "/media/data", fstype: "ext4", "volume_type": "ephemeral", ephemeral: ephemeral0}], lvmparams: {vg_name: "vg0", lv_name: "lv0", lv_size: "+100%FREE"} } -# hosthdd_multi: {vms_by_az: {a: 1, b: 0, c: 0}, flavor: d2.xlarge, version: "{{sys_version | default('')}}", auto_volumes: [{device_name: "/dev/sdb", mountpoint: "/media/mysvc", fstype: "ext4", "volume_type": "ephemeral", ephemeral: ephemeral0}, {device_name: "/dev/sdc", mountpoint: "/media/mysvc2", fstype: "ext4", "volume_type": "ephemeral", ephemeral: ephemeral1}, {device_name: "/dev/sdd", mountpoint: "/media/mysvc3", fstype: "ext4", "volume_type": "ephemeral", ephemeral: ephemeral2}] } -# hosthdd_lvm: {vms_by_az: {a: 1, b: 0, c: 0}, flavor: d2.xlarge, version: "{{sys_version | default('')}}", auto_volumes: [{device_name: "/dev/sdb", mountpoint: "/media/data", fstype: "ext4", "volume_type": "ephemeral", ephemeral: ephemeral0}, {device_name: "/dev/sdc", mountpoint: "/media/data", fstype: "ext4", "volume_type": "ephemeral", ephemeral: ephemeral1}, {device_name: "/dev/sdd", mountpoint: "/media/data", fstype: "ext4", "volume_type": "ephemeral", ephemeral: ephemeral2}], lvmparams: {vg_name: "vg0", lv_name: "lv0", lv_size: "+100%FREE"} } -# hostssd: {vms_by_az: {a: 1, b: 0, c: 0}, flavor: c3.large, version: "{{sys_version | default('')}}", auto_volumes: [{device_name: "/dev/sdf", mountpoint: "/media/mysvc", fstype: "ext4", "volume_type": "gp2", "volume_size": 1, ephemeral: 0, encrypted: True, "delete_on_termination": true}]} +# sysdisks-snapshot: {vms_by_az: {a: 1, b: 1, c: 0}, flavor: t3a.nano, version: "{{sys_version | default('')}}", auto_volumes: [{"snapshot_tags": {"tag:backup_id": "57180566894481854905"}, "device_name": "/dev/sdf", mountpoint: "/media/data", fstype: "ext4", "volume_type": "gp2", "volume_size": 1, encrypted: True, "delete_on_termination": true }]} +# hostnvme-multi: {vms_by_az: {a: 1, b: 0, c: 0}, flavor: i3en.2xlarge, version: "{{sys_version | default('')}}", auto_volumes: [{device_name: "/dev/sdb", mountpoint: "/media/mysvc", fstype: "ext4", "volume_type": "ephemeral", ephemeral: ephemeral0}, {device_name: "/dev/sdc", mountpoint: "/media/mysvc2", fstype: "ext4", "volume_type": "ephemeral", ephemeral: ephemeral1}, {"device_name": "/dev/sdf", mountpoint: "/media/mysvc8", fstype: "ext4", "volume_type": "gp2", "volume_size": 1, encrypted: True, "delete_on_termination": true }] } +# hostnvme-lvm: {vms_by_az: {a: 1, b: 0, c: 0}, flavor: i3en.2xlarge, version: "{{sys_version | default('')}}", auto_volumes: [{device_name: "/dev/sdb", mountpoint: "/media/data", fstype: "ext4", "volume_type": "ephemeral", ephemeral: ephemeral0}, {device_name: "/dev/sdc", mountpoint: "/media/data", fstype: "ext4", "volume_type": "ephemeral", ephemeral: ephemeral1}], lvmparams: {vg_name: "vg0", lv_name: "lv0", lv_size: "+100%FREE"} } +# hosthdd-multi: {vms_by_az: {a: 1, b: 0, c: 0}, flavor: d2.xlarge, version: "{{sys_version | default('')}}", auto_volumes: [{device_name: "/dev/sdb", mountpoint: "/media/mysvc", fstype: "ext4", "volume_type": "ephemeral", ephemeral: ephemeral0}, {device_name: "/dev/sdc", mountpoint: "/media/mysvc2", fstype: "ext4", "volume_type": "ephemeral", ephemeral: ephemeral1}, {device_name: "/dev/sdd", mountpoint: "/media/mysvc3", fstype: "ext4", "volume_type": "ephemeral", ephemeral: ephemeral2}] } +# hosthdd-lvm: {vms_by_az: {a: 1, b: 0, c: 0}, flavor: d2.xlarge, version: "{{sys_version | default('')}}", auto_volumes: [{device_name: "/dev/sdb", mountpoint: "/media/data", fstype: "ext4", "volume_type": "ephemeral", ephemeral: ephemeral0}, {device_name: "/dev/sdc", mountpoint: "/media/data", fstype: "ext4", "volume_type": "ephemeral", ephemeral: ephemeral1}, {device_name: "/dev/sdd", mountpoint: "/media/data", fstype: "ext4", "volume_type": "ephemeral", ephemeral: ephemeral2}], lvmparams: {vg_name: "vg0", lv_name: "lv0", lv_size: "+100%FREE"} } aws_access_key: "" aws_secret_key: "" vpc_name: "test{{buildenv}}" diff --git a/_dependencies/library/ebsmap.py b/_dependencies/library/ebsmap.py index dbd6d717..0cb47d6a 100644 --- a/_dependencies/library/ebsmap.py +++ b/_dependencies/library/ebsmap.py @@ -110,6 +110,11 @@ except: pass +try: + from urllib.request import urlopen +except ImportError: + from urllib2 import urlopen + NVME_ADMIN_IDENTIFY = 0x06 NVME_IOCTL_ADMIN_CMD = 0xC0484E41 AMZN_NVME_VID = 0x1D0F @@ -243,10 +248,22 @@ def fail_json(self, msg): module = cDummyAnsibleModule() # Get all existing block volumes by key=value, then parse this into a dictionary (which excludes non disk and partition block types, e.g. ram, loop). - lsblk_devices = subprocess.check_output(['lsblk', '-o', 'NAME,TYPE,UUID,FSTYPE,PARTLABEL,MOUNTPOINT,SERIAL', '-P']).decode().rstrip().split('\n') + lsblk_devices = subprocess.check_output(['lsblk', '-o', 'NAME,TYPE,UUID,FSTYPE,MOUNTPOINT,MODEL,SERIAL', '-P']).decode().rstrip().split('\n') os_device_names = [dict((map(lambda x: x.strip("\""), sub.split("="))) for sub in dev.split('\" ') if '=' in sub) for dev in lsblk_devices] os_device_names = [dev for dev in os_device_names if dev['TYPE'] in ['disk', 'part', 'lvm']] - + os_device_names.sort(key=lambda k: k['NAME']) + + # Instance stores (AKA ephemeral volumes) do not appear to have a defined endpoint that maps between the /dev/sd[b-e] defined in the instance creation map, and the OS /dev/nvme[0-26]n1 device. + # For this scenario, we can only return the instance stores in the order that they are defined. Because instance stores do not survive a poweroff and cannot be detached and reattached, the order doesn't matter as much. + instance_store_map = [] + with urlopen("http://169.254.169.254/latest/meta-data/block-device-mapping/") as response__block_device_mapping: + block_device_mappings = response__block_device_mapping.read().decode().split("\n") + for block_device_mappings__ephemeral_id in [dev for dev in block_device_mappings if dev.startswith('ephemeral')]: + with urlopen("http://169.254.169.254/latest/meta-data/block-device-mapping/" + block_device_mappings__ephemeral_id) as response__ephemeral_device: + block_device_mappings__ephemeral_mapped = response__ephemeral_device.read().decode() + instance_store_map.append({'ephemeral_id': block_device_mappings__ephemeral_id, 'ephemeral_map': block_device_mappings__ephemeral_mapped}) + + instance_store_count = 0 for os_device in os_device_names: os_device_path = "/dev/" + os_device['NAME'] if os_device['NAME'].startswith("nvme"): @@ -254,6 +271,12 @@ def fail_json(self, msg): dev = ebs_nvme_device(os_device_path) except FileNotFoundError as e: module.fail_json(msg=os_device_path + ": FileNotFoundError" + str(e)) + except TypeError as e: + if instance_store_count < len(instance_store_map): + os_device.update({"device_name_os": os_device_path, "device_name_aws": '/dev/' + instance_store_map[instance_store_count]['ephemeral_map'], "volume_id": dev.get_volume_id()}) + instance_store_count += 1 + else: + module.warn(u"%s is not an EBS device and there is no instance store mapping." % os_device_path) except OSError as e: module.warn(u"%s is not an nvme device." % os_device_path) else: diff --git a/_dependencies/library/ebsmap__LICENSE b/_dependencies/library/ebsmap__LICENSE index 3c642ec5..fb891162 100644 --- a/_dependencies/library/ebsmap__LICENSE +++ b/_dependencies/library/ebsmap__LICENSE @@ -27,3 +27,27 @@ SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + + +--- + +Copyright 2017 Amazon.com, Inc. and its affiliates. All Rights Reserved. + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is furnished +to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in all +copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +SOFTWARE. + From 7c5a27518310096a9cfe15a14ab736725bfa8033 Mon Sep 17 00:00:00 2001 From: Dougal Seeley Date: Sun, 4 Oct 2020 23:34:52 +0100 Subject: [PATCH 34/58] Create a new redeploy scheme (_scheme_rmvm_keepdisk_rollback), which moves disks between old and new VMs, saving a lot of time. + Replace the disks_auto logic to use the actual mapping of AWS device name to OS device name. This is necessary to support the disk moving scheme, and is much more reliable; include the 'ebsmap' module from https://github.com/dseeley/ebsmap to do this. + Mount disks using UUID instead of device string, as with nvme disks, the device names can change between restarts (https://docs.aws.amazon.com/AWSEC2/latest/UserGuide/nvme-ebs-volumes.html#identify-nvme-ebs-device). --- EXAMPLE/group_vars/_skel/cluster_vars.yml | 13 +++--- .../group_vars/test_aws_euw1/cluster_vars.yml | 11 ++--- _dependencies/library/ebsmap.py | 27 +---------- _dependencies/library/ebsmap__LICENSE | 2 +- .../tasks/get_cluster_hosts_state.yml | 45 ------------------- .../tasks/preflight.yml | 3 -- redeploy/tasks/main.yml | 2 +- 7 files changed, 17 insertions(+), 86 deletions(-) diff --git a/EXAMPLE/group_vars/_skel/cluster_vars.yml b/EXAMPLE/group_vars/_skel/cluster_vars.yml index 5a1c2b0a..4f004bd4 100644 --- a/EXAMPLE/group_vars/_skel/cluster_vars.yml +++ b/EXAMPLE/group_vars/_skel/cluster_vars.yml @@ -71,14 +71,15 @@ cluster_name: "{{app_name}}-{{buildenv}}" # Identifies the cluster within # rule_desc: "Access from all VMs attached to the {{ cluster_name }}-sg group" # sandbox: # hosttype_vars: -# sys: {vms_by_az: {a: 1, b: 0, c: 0}, flavor: t3a.nano, version: "{{sys_version | default('')}}", auto_volumes: []} +# sys: {vms_by_az: {a: 1, b: 1, c: 1}, flavor: t3a.nano, version: "{{sys_version | default('')}}", auto_volumes: []} # #sysdisks2: {vms_by_az: {a: 1, b: 0, c: 0}, flavor: t3a.nano, version: "{{sysdisks_version | default('')}}", auto_volumes: [{"device_name": "/dev/sdf", mountpoint: "/media/mysvc", fstype: "ext4", "volume_type": "gp2", "volume_size": 1, encrypted: True, "delete_on_termination": true, perms: {owner: "root", group: "sudo", mode: "775"} }, {"device_name": "/dev/sdg", mountpoint: "/media/mysvc2", fstype: "ext4", "volume_type": "gp2", "volume_size": 1, encrypted: True, "delete_on_termination": true}]} # #sysdisks3: {vms_by_az: {a: 1, b: 0, c: 0}, flavor: t3a.nano, version: "{{sysdisks_version | default('')}}", auto_volumes: [{"device_name": "/dev/sdf", mountpoint: "/media/mysvc", fstype: "ext4", "volume_type": "gp2", "volume_size": 1, encrypted: True, "delete_on_termination": true, perms: {owner: "root", group: "sudo", mode: "775"} }, {"device_name": "/dev/sdg", mountpoint: "/media/mysvc2", fstype: "ext4", "volume_type": "gp2", "volume_size": 1, encrypted: True, "delete_on_termination": true}, {"device_name": "/dev/sdh", mountpoint: "/media/mysvc3", fstype: "ext4", "volume_type": "gp2", "volume_size": 1, encrypted: True, "delete_on_termination": true}]} -# #sysdisks-snapshot: {vms_by_az: {a: 1, b: 1, c: 0}, flavor: t3a.nano, version: "{{sys_version | default('')}}", auto_volumes: [{"snapshot_tags": {"tag:backup_id": "57180566894481854905"}, "device_name": "/dev/sdf", mountpoint: "/media/data", fstype: "ext4", "volume_type": "gp2", "volume_size": 1, encrypted: True, "delete_on_termination": true }]} -# #hostnvme-multi: {vms_by_az: {a: 1, b: 0, c: 0}, flavor: i3en.2xlarge, version: "{{sys_version | default('')}}", auto_volumes: [{device_name: "/dev/sdb", mountpoint: "/media/mysvc", fstype: "ext4", "volume_type": "ephemeral", ephemeral: ephemeral0}, {device_name: "/dev/sdc", mountpoint: "/media/mysvc2", fstype: "ext4", "volume_type": "ephemeral", ephemeral: ephemeral1}, {"device_name": "/dev/sdf", mountpoint: "/media/mysvc8", fstype: "ext4", "volume_type": "gp2", "volume_size": 1, encrypted: True, "delete_on_termination": true }] } -# #hostnvme-lvm: {vms_by_az: {a: 1, b: 0, c: 0}, flavor: i3en.2xlarge, version: "{{sys_version | default('')}}", auto_volumes: [{device_name: "/dev/sdb", mountpoint: "/media/data", fstype: "ext4", "volume_type": "ephemeral", ephemeral: ephemeral0}, {device_name: "/dev/sdc", mountpoint: "/media/data", fstype: "ext4", "volume_type": "ephemeral", ephemeral: ephemeral1}], lvmparams: {vg_name: "vg0", lv_name: "lv0", lv_size: "+100%FREE"} } -# #hosthdd-multi: {vms_by_az: {a: 1, b: 0, c: 0}, flavor: d2.xlarge, version: "{{sys_version | default('')}}", auto_volumes: [{device_name: "/dev/sdb", mountpoint: "/media/mysvc", fstype: "ext4", "volume_type": "ephemeral", ephemeral: ephemeral0}, {device_name: "/dev/sdc", mountpoint: "/media/mysvc2", fstype: "ext4", "volume_type": "ephemeral", ephemeral: ephemeral1}, {device_name: "/dev/sdd", mountpoint: "/media/mysvc3", fstype: "ext4", "volume_type": "ephemeral", ephemeral: ephemeral2}] } -# #hosthdd-lvm: {vms_by_az: {a: 1, b: 0, c: 0}, flavor: d2.xlarge, version: "{{sys_version | default('')}}", auto_volumes: [{device_name: "/dev/sdb", mountpoint: "/media/data", fstype: "ext4", "volume_type": "ephemeral", ephemeral: ephemeral0}, {device_name: "/dev/sdc", mountpoint: "/media/data", fstype: "ext4", "volume_type": "ephemeral", ephemeral: ephemeral1}, {device_name: "/dev/sdd", mountpoint: "/media/data", fstype: "ext4", "volume_type": "ephemeral", ephemeral: ephemeral2}], lvmparams: {vg_name: "vg0", lv_name: "lv0", lv_size: "+100%FREE"} } +# #sysdisks_snapshot: {vms_by_az: {a: 1, b: 1, c: 0}, flavor: t3a.nano, version: "{{sys_version | default('')}}", auto_volumes: [{"snapshot_tags": {"tag:backup_id": "57180566894481854905"}, "device_name": "/dev/sdf", mountpoint: "/data", fstype: "ext4", "volume_type": "gp2", "volume_size": 1, encrypted: True, "delete_on_termination": true }]} +# #hostnvme_multi: {vms_by_az: {a: 1, b: 0, c: 0}, flavor: i3en.2xlarge, version: "{{sys_version | default('')}}", auto_volumes: [{device_name: "/dev/sdb", mountpoint: "/media/data", fstype: "ext4", "volume_type": "ephemeral", ephemeral: ephemeral0}, {device_name: "/dev/sdbc", mountpoint: "/media/data", fstype: "ext4", "volume_type": "ephemeral", ephemeral: ephemeral0}] } } +# #hostnvme_lvm: {vms_by_az: {a: 1, b: 0, c: 0}, flavor: i3en.2xlarge, version: "{{sys_version | default('')}}", auto_volumes: [{device_name: "/dev/sdb", mountpoint: "/media/data", fstype: "ext4", "volume_type": "ephemeral", ephemeral: ephemeral0}, {device_name: "/dev/sdbc", mountpoint: "/media/data", fstype: "ext4", "volume_type": "ephemeral", ephemeral: ephemeral0}], lvmparams: {vg_name: "vg0", lv_name: "lv0", lv_size: "+100%FREE"} } +# #hosthdd_multi: {vms_by_az: {a: 1, b: 0, c: 0}, flavor: d2.xlarge, version: "{{sys_version | default('')}}", auto_volumes: [{device_name: "/dev/sdb", mountpoint: "/media/mysvc", fstype: "ext4", "volume_type": "ephemeral", ephemeral: ephemeral0}, {device_name: "/dev/sdc", mountpoint: "/media/mysvc2", fstype: "ext4", "volume_type": "ephemeral", ephemeral: ephemeral1}, {device_name: "/dev/sdd", mountpoint: "/media/mysvc3", fstype: "ext4", "volume_type": "ephemeral", ephemeral: ephemeral2}] } +# #hosthdd_lvm: {vms_by_az: {a: 1, b: 0, c: 0}, flavor: d2.xlarge, version: "{{sys_version | default('')}}", auto_volumes: [{device_name: "/dev/sdb", mountpoint: "/media/data", fstype: "ext4", "volume_type": "ephemeral", ephemeral: ephemeral0}, {device_name: "/dev/sdc", mountpoint: "/media/data", fstype: "ext4", "volume_type": "ephemeral", ephemeral: ephemeral1}, {device_name: "/dev/sdd", mountpoint: "/media/data", fstype: "ext4", "volume_type": "ephemeral", ephemeral: ephemeral2}], lvmparams: {vg_name: "vg0", lv_name: "lv0", lv_size: "+100%FREE"} } +# #hostssd: {vms_by_az: {a: 1, b: 0, c: 0}, flavor: c3.large, version: "{{sys_version | default('')}}", auto_volumes: [{device_name: "/dev/sdf", mountpoint: "/media/mysvc", fstype: "ext4", "volume_type": "gp2", "volume_size": 1, ephemeral: 0, encrypted: True, "delete_on_termination": true}]} # aws_access_key: "" # aws_secret_key: "" # vpc_name: "test{{buildenv}}" diff --git a/EXAMPLE/group_vars/test_aws_euw1/cluster_vars.yml b/EXAMPLE/group_vars/test_aws_euw1/cluster_vars.yml index e063d8c8..810f4be2 100644 --- a/EXAMPLE/group_vars/test_aws_euw1/cluster_vars.yml +++ b/EXAMPLE/group_vars/test_aws_euw1/cluster_vars.yml @@ -81,11 +81,12 @@ cluster_vars: sys: {vms_by_az: {a: 1, b: 1, c: 1}, flavor: t3a.nano, version: "{{sys_version | default('')}}", auto_volumes: []} # sysdisks2: {vms_by_az: {a: 1, b: 0, c: 0}, flavor: t3a.nano, version: "{{sysdisks_version | default('')}}", auto_volumes: [{"device_name": "/dev/sdf", mountpoint: "/media/mysvc", fstype: "ext4", "volume_type": "gp2", "volume_size": 1, encrypted: True, "delete_on_termination": true, perms: {owner: "root", group: "sudo", mode: "775"} }, {"device_name": "/dev/sdg", mountpoint: "/media/mysvc2", fstype: "ext4", "volume_type": "gp2", "volume_size": 1, encrypted: True, "delete_on_termination": true}]} # sysdisks3: {vms_by_az: {a: 1, b: 0, c: 0}, flavor: t3a.nano, version: "{{sysdisks_version | default('')}}", auto_volumes: [{"device_name": "/dev/sdf", mountpoint: "/media/mysvc", fstype: "ext4", "volume_type": "gp2", "volume_size": 1, encrypted: True, "delete_on_termination": true, perms: {owner: "root", group: "sudo", mode: "775"} }, {"device_name": "/dev/sdg", mountpoint: "/media/mysvc2", fstype: "ext4", "volume_type": "gp2", "volume_size": 1, encrypted: True, "delete_on_termination": true}, {"device_name": "/dev/sdh", mountpoint: "/media/mysvc3", fstype: "ext4", "volume_type": "gp2", "volume_size": 1, encrypted: True, "delete_on_termination": true}]} -# sysdisks-snapshot: {vms_by_az: {a: 1, b: 1, c: 0}, flavor: t3a.nano, version: "{{sys_version | default('')}}", auto_volumes: [{"snapshot_tags": {"tag:backup_id": "57180566894481854905"}, "device_name": "/dev/sdf", mountpoint: "/media/data", fstype: "ext4", "volume_type": "gp2", "volume_size": 1, encrypted: True, "delete_on_termination": true }]} -# hostnvme-multi: {vms_by_az: {a: 1, b: 0, c: 0}, flavor: i3en.2xlarge, version: "{{sys_version | default('')}}", auto_volumes: [{device_name: "/dev/sdb", mountpoint: "/media/mysvc", fstype: "ext4", "volume_type": "ephemeral", ephemeral: ephemeral0}, {device_name: "/dev/sdc", mountpoint: "/media/mysvc2", fstype: "ext4", "volume_type": "ephemeral", ephemeral: ephemeral1}, {"device_name": "/dev/sdf", mountpoint: "/media/mysvc8", fstype: "ext4", "volume_type": "gp2", "volume_size": 1, encrypted: True, "delete_on_termination": true }] } -# hostnvme-lvm: {vms_by_az: {a: 1, b: 0, c: 0}, flavor: i3en.2xlarge, version: "{{sys_version | default('')}}", auto_volumes: [{device_name: "/dev/sdb", mountpoint: "/media/data", fstype: "ext4", "volume_type": "ephemeral", ephemeral: ephemeral0}, {device_name: "/dev/sdc", mountpoint: "/media/data", fstype: "ext4", "volume_type": "ephemeral", ephemeral: ephemeral1}], lvmparams: {vg_name: "vg0", lv_name: "lv0", lv_size: "+100%FREE"} } -# hosthdd-multi: {vms_by_az: {a: 1, b: 0, c: 0}, flavor: d2.xlarge, version: "{{sys_version | default('')}}", auto_volumes: [{device_name: "/dev/sdb", mountpoint: "/media/mysvc", fstype: "ext4", "volume_type": "ephemeral", ephemeral: ephemeral0}, {device_name: "/dev/sdc", mountpoint: "/media/mysvc2", fstype: "ext4", "volume_type": "ephemeral", ephemeral: ephemeral1}, {device_name: "/dev/sdd", mountpoint: "/media/mysvc3", fstype: "ext4", "volume_type": "ephemeral", ephemeral: ephemeral2}] } -# hosthdd-lvm: {vms_by_az: {a: 1, b: 0, c: 0}, flavor: d2.xlarge, version: "{{sys_version | default('')}}", auto_volumes: [{device_name: "/dev/sdb", mountpoint: "/media/data", fstype: "ext4", "volume_type": "ephemeral", ephemeral: ephemeral0}, {device_name: "/dev/sdc", mountpoint: "/media/data", fstype: "ext4", "volume_type": "ephemeral", ephemeral: ephemeral1}, {device_name: "/dev/sdd", mountpoint: "/media/data", fstype: "ext4", "volume_type": "ephemeral", ephemeral: ephemeral2}], lvmparams: {vg_name: "vg0", lv_name: "lv0", lv_size: "+100%FREE"} } +# sysdisks_snapshot: {vms_by_az: {a: 1, b: 1, c: 0}, flavor: t3a.nano, version: "{{sys_version | default('')}}", auto_volumes: [{"snapshot_tags": {"tag:backup_id": "57180566894481854905"}, "device_name": "/dev/sdf", mountpoint: "/data", fstype: "ext4", "volume_type": "gp2", "volume_size": 1, encrypted: True, "delete_on_termination": true }]} +# hostnvme_multi: {vms_by_az: {a: 1, b: 0, c: 0}, flavor: i3en.2xlarge, version: "{{sys_version | default('')}}", auto_volumes: [{device_name: "/dev/sdb", mountpoint: "/media/data", fstype: "ext4", "volume_type": "ephemeral", ephemeral: ephemeral0}, {device_name: "/dev/sdbc", mountpoint: "/media/data", fstype: "ext4", "volume_type": "ephemeral", ephemeral: ephemeral0}] } } +# hostnvme_lvm: {vms_by_az: {a: 1, b: 0, c: 0}, flavor: i3en.2xlarge, version: "{{sys_version | default('')}}", auto_volumes: [{device_name: "/dev/sdb", mountpoint: "/media/data", fstype: "ext4", "volume_type": "ephemeral", ephemeral: ephemeral0}, {device_name: "/dev/sdbc", mountpoint: "/media/data", fstype: "ext4", "volume_type": "ephemeral", ephemeral: ephemeral0}], lvmparams: {vg_name: "vg0", lv_name: "lv0", lv_size: "+100%FREE"} } +# hosthdd_multi: {vms_by_az: {a: 1, b: 0, c: 0}, flavor: d2.xlarge, version: "{{sys_version | default('')}}", auto_volumes: [{device_name: "/dev/sdb", mountpoint: "/media/mysvc", fstype: "ext4", "volume_type": "ephemeral", ephemeral: ephemeral0}, {device_name: "/dev/sdc", mountpoint: "/media/mysvc2", fstype: "ext4", "volume_type": "ephemeral", ephemeral: ephemeral1}, {device_name: "/dev/sdd", mountpoint: "/media/mysvc3", fstype: "ext4", "volume_type": "ephemeral", ephemeral: ephemeral2}] } +# hosthdd_lvm: {vms_by_az: {a: 1, b: 0, c: 0}, flavor: d2.xlarge, version: "{{sys_version | default('')}}", auto_volumes: [{device_name: "/dev/sdb", mountpoint: "/media/data", fstype: "ext4", "volume_type": "ephemeral", ephemeral: ephemeral0}, {device_name: "/dev/sdc", mountpoint: "/media/data", fstype: "ext4", "volume_type": "ephemeral", ephemeral: ephemeral1}, {device_name: "/dev/sdd", mountpoint: "/media/data", fstype: "ext4", "volume_type": "ephemeral", ephemeral: ephemeral2}], lvmparams: {vg_name: "vg0", lv_name: "lv0", lv_size: "+100%FREE"} } +# hostssd: {vms_by_az: {a: 1, b: 0, c: 0}, flavor: c3.large, version: "{{sys_version | default('')}}", auto_volumes: [{device_name: "/dev/sdf", mountpoint: "/media/mysvc", fstype: "ext4", "volume_type": "gp2", "volume_size": 1, ephemeral: 0, encrypted: True, "delete_on_termination": true}]} aws_access_key: "" aws_secret_key: "" vpc_name: "test{{buildenv}}" diff --git a/_dependencies/library/ebsmap.py b/_dependencies/library/ebsmap.py index 0cb47d6a..dbd6d717 100644 --- a/_dependencies/library/ebsmap.py +++ b/_dependencies/library/ebsmap.py @@ -110,11 +110,6 @@ except: pass -try: - from urllib.request import urlopen -except ImportError: - from urllib2 import urlopen - NVME_ADMIN_IDENTIFY = 0x06 NVME_IOCTL_ADMIN_CMD = 0xC0484E41 AMZN_NVME_VID = 0x1D0F @@ -248,22 +243,10 @@ def fail_json(self, msg): module = cDummyAnsibleModule() # Get all existing block volumes by key=value, then parse this into a dictionary (which excludes non disk and partition block types, e.g. ram, loop). - lsblk_devices = subprocess.check_output(['lsblk', '-o', 'NAME,TYPE,UUID,FSTYPE,MOUNTPOINT,MODEL,SERIAL', '-P']).decode().rstrip().split('\n') + lsblk_devices = subprocess.check_output(['lsblk', '-o', 'NAME,TYPE,UUID,FSTYPE,PARTLABEL,MOUNTPOINT,SERIAL', '-P']).decode().rstrip().split('\n') os_device_names = [dict((map(lambda x: x.strip("\""), sub.split("="))) for sub in dev.split('\" ') if '=' in sub) for dev in lsblk_devices] os_device_names = [dev for dev in os_device_names if dev['TYPE'] in ['disk', 'part', 'lvm']] - os_device_names.sort(key=lambda k: k['NAME']) - - # Instance stores (AKA ephemeral volumes) do not appear to have a defined endpoint that maps between the /dev/sd[b-e] defined in the instance creation map, and the OS /dev/nvme[0-26]n1 device. - # For this scenario, we can only return the instance stores in the order that they are defined. Because instance stores do not survive a poweroff and cannot be detached and reattached, the order doesn't matter as much. - instance_store_map = [] - with urlopen("http://169.254.169.254/latest/meta-data/block-device-mapping/") as response__block_device_mapping: - block_device_mappings = response__block_device_mapping.read().decode().split("\n") - for block_device_mappings__ephemeral_id in [dev for dev in block_device_mappings if dev.startswith('ephemeral')]: - with urlopen("http://169.254.169.254/latest/meta-data/block-device-mapping/" + block_device_mappings__ephemeral_id) as response__ephemeral_device: - block_device_mappings__ephemeral_mapped = response__ephemeral_device.read().decode() - instance_store_map.append({'ephemeral_id': block_device_mappings__ephemeral_id, 'ephemeral_map': block_device_mappings__ephemeral_mapped}) - - instance_store_count = 0 + for os_device in os_device_names: os_device_path = "/dev/" + os_device['NAME'] if os_device['NAME'].startswith("nvme"): @@ -271,12 +254,6 @@ def fail_json(self, msg): dev = ebs_nvme_device(os_device_path) except FileNotFoundError as e: module.fail_json(msg=os_device_path + ": FileNotFoundError" + str(e)) - except TypeError as e: - if instance_store_count < len(instance_store_map): - os_device.update({"device_name_os": os_device_path, "device_name_aws": '/dev/' + instance_store_map[instance_store_count]['ephemeral_map'], "volume_id": dev.get_volume_id()}) - instance_store_count += 1 - else: - module.warn(u"%s is not an EBS device and there is no instance store mapping." % os_device_path) except OSError as e: module.warn(u"%s is not an nvme device." % os_device_path) else: diff --git a/_dependencies/library/ebsmap__LICENSE b/_dependencies/library/ebsmap__LICENSE index fb891162..55138771 100644 --- a/_dependencies/library/ebsmap__LICENSE +++ b/_dependencies/library/ebsmap__LICENSE @@ -27,7 +27,7 @@ SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - + --- diff --git a/cluster_hosts/tasks/get_cluster_hosts_state.yml b/cluster_hosts/tasks/get_cluster_hosts_state.yml index 7d50dafa..ff0fca7e 100644 --- a/cluster_hosts/tasks/get_cluster_hosts_state.yml +++ b/cluster_hosts/tasks/get_cluster_hosts_state.yml @@ -47,51 +47,6 @@ _cluster_hosts_state__urlregion: "{{r__gcp_compute_instance_info.results | json_query(\"[?resources[?labels]].resources[].{name: name, regionzone: zone, tagslabels: labels, instance_id: id, instance_state: status}\") }}" when: cluster_vars.type == "gcp" -- name: get_cluster_hosts_state_esxifree | Get VMware cluster_hosts_state - block: - - name: get_cluster_hosts_state_esxifree | Get existing VMware instance info - vmware_vm_info: - username: "{{ cluster_vars.username }}" - password: "{{ cluster_vars.password }}" - hostname: "{{ cluster_vars.esxi_ip }}" - validate_certs: no - register: r__vmware_vm_info - delegate_to: localhost - run_once: true - - - name: get_cluster_hosts_state_esxifree | Get existing VMware instance facts - vmware_guest_info: - username: "{{ cluster_vars.username }}" - password: "{{ cluster_vars.password }}" - hostname: "{{ cluster_vars.esxi_ip }}" - validate_certs: no - datacenter: None - uuid: "{{item.uuid}}" - with_items: "{{ r__vmware_vm_info.virtual_machines | to_json | from_json | json_query(\"[?starts_with(guest_name, '\"+cluster_name+\"')]\") }}" - register: r__vmware_guest_info - delegate_to: localhost - run_once: true - - ## esxifree hosts must use the esxi 'annotations' field as json. They are stored as unconventional text in the vmx file, so must - ## be converted into inline-json within the facts. If the annotation field is not convertible to json, then we don't consider this VM part of the cluster. - - name: get_cluster_hosts_state_esxifree | update r__vmware_guest_info result with json-parsed annotations - set_fact: - r__vmware_guest_info: | - {% set res = {'results': []} -%} - {%- for result in r__vmware_guest_info.results -%} - {%- set loadloose_res = result.instance.annotation | json_loads_loose -%} - {%- if loadloose_res | type_debug == 'dict' or loadloose_res | type_debug == 'list' -%} - {%- set _ = result.instance.update({'annotation': loadloose_res}) -%} - {%- set _ = res.results.append(result) -%} - {%- endif -%} - {%- endfor -%} - {{ res }} - - - name: get_cluster_hosts_state_esxifree | Set cluster_hosts_state - set_fact: - cluster_hosts_state: "{{ r__vmware_guest_info.results | json_query(\"[].{name: instance.hw_name, regionzone: None, tagslabels: instance.annotation, instance_id: instance.moid, instance_state: instance.hw_power_status}\") }}" - - when: cluster_vars.type == "esxifree" - name: get_cluster_hosts_state | cluster_hosts_state diff --git a/redeploy/_scheme_rmvm_keepdisk_rollback/tasks/preflight.yml b/redeploy/_scheme_rmvm_keepdisk_rollback/tasks/preflight.yml index f04a2b13..dbc99a0a 100644 --- a/redeploy/_scheme_rmvm_keepdisk_rollback/tasks/preflight.yml +++ b/redeploy/_scheme_rmvm_keepdisk_rollback/tasks/preflight.yml @@ -32,9 +32,6 @@ {{ testloop.is_not_subset }} when: cluster_vars.type == "aws" - - assert: { that: "_scheme_rmvm_keepdisk_only__copy_or_move is defined and _scheme_rmvm_keepdisk_only__copy_or_move in ['copy', 'move']", fail_msg: "ERROR - _scheme_rmvm_keepdisk_only__copy_or_move must be defined and set to either 'copy' or 'move'" } - when: cluster_vars.type == "esxifree" - - assert: { that: "non_current_hosts | length == 0", fail_msg: "ERROR - All VMs must be in the 'current' lifecycle_state. Those not [{{non_current_hosts | join(',')}}]" } vars: { non_current_hosts: "{{ cluster_hosts_state | json_query(\"[?tagslabels.lifecycle_state!='current'].name\") }}" } when: (canary=="start" or canary=="none") diff --git a/redeploy/tasks/main.yml b/redeploy/tasks/main.yml index bb4f62ee..e4788e94 100644 --- a/redeploy/tasks/main.yml +++ b/redeploy/tasks/main.yml @@ -4,7 +4,7 @@ block: - assert: { that: "clean is not defined", msg: "Must not set the 'clean' variable for a redeploy" } - assert: { that: "canary is defined and (canary is defined and canary in ['start', 'finish', 'none', 'tidy', 'revert'])", msg: "Canary must be 'start', 'finish', 'none', 'tidy' or 'revert'" } - - assert: { that: "redeploy_scheme is defined" } + - assert: { that: "redeploy_scheme is defined and redeploy_scheme in redeploy_schemes_supported" } - assert: { that: "cluster_hosts_state | length", msg: "Redeploy only possible with an existing cluster." } - name: "Run the {{redeploy_scheme}} redploy scheme" From ef19ec967f526ad2a7ac986ca13ded03d7bd4383 Mon Sep 17 00:00:00 2001 From: Dougal Seeley Date: Mon, 5 Oct 2020 17:29:04 +0100 Subject: [PATCH 35/58] Update ebsmap.py to support NVME instance stores --- EXAMPLE/group_vars/_skel/cluster_vars.yml | 13 +++++---- .../group_vars/test_aws_euw1/cluster_vars.yml | 11 ++++---- _dependencies/library/ebsmap.py | 27 +++++++++++++++++-- 3 files changed, 36 insertions(+), 15 deletions(-) diff --git a/EXAMPLE/group_vars/_skel/cluster_vars.yml b/EXAMPLE/group_vars/_skel/cluster_vars.yml index 4f004bd4..5a1c2b0a 100644 --- a/EXAMPLE/group_vars/_skel/cluster_vars.yml +++ b/EXAMPLE/group_vars/_skel/cluster_vars.yml @@ -71,15 +71,14 @@ cluster_name: "{{app_name}}-{{buildenv}}" # Identifies the cluster within # rule_desc: "Access from all VMs attached to the {{ cluster_name }}-sg group" # sandbox: # hosttype_vars: -# sys: {vms_by_az: {a: 1, b: 1, c: 1}, flavor: t3a.nano, version: "{{sys_version | default('')}}", auto_volumes: []} +# sys: {vms_by_az: {a: 1, b: 0, c: 0}, flavor: t3a.nano, version: "{{sys_version | default('')}}", auto_volumes: []} # #sysdisks2: {vms_by_az: {a: 1, b: 0, c: 0}, flavor: t3a.nano, version: "{{sysdisks_version | default('')}}", auto_volumes: [{"device_name": "/dev/sdf", mountpoint: "/media/mysvc", fstype: "ext4", "volume_type": "gp2", "volume_size": 1, encrypted: True, "delete_on_termination": true, perms: {owner: "root", group: "sudo", mode: "775"} }, {"device_name": "/dev/sdg", mountpoint: "/media/mysvc2", fstype: "ext4", "volume_type": "gp2", "volume_size": 1, encrypted: True, "delete_on_termination": true}]} # #sysdisks3: {vms_by_az: {a: 1, b: 0, c: 0}, flavor: t3a.nano, version: "{{sysdisks_version | default('')}}", auto_volumes: [{"device_name": "/dev/sdf", mountpoint: "/media/mysvc", fstype: "ext4", "volume_type": "gp2", "volume_size": 1, encrypted: True, "delete_on_termination": true, perms: {owner: "root", group: "sudo", mode: "775"} }, {"device_name": "/dev/sdg", mountpoint: "/media/mysvc2", fstype: "ext4", "volume_type": "gp2", "volume_size": 1, encrypted: True, "delete_on_termination": true}, {"device_name": "/dev/sdh", mountpoint: "/media/mysvc3", fstype: "ext4", "volume_type": "gp2", "volume_size": 1, encrypted: True, "delete_on_termination": true}]} -# #sysdisks_snapshot: {vms_by_az: {a: 1, b: 1, c: 0}, flavor: t3a.nano, version: "{{sys_version | default('')}}", auto_volumes: [{"snapshot_tags": {"tag:backup_id": "57180566894481854905"}, "device_name": "/dev/sdf", mountpoint: "/data", fstype: "ext4", "volume_type": "gp2", "volume_size": 1, encrypted: True, "delete_on_termination": true }]} -# #hostnvme_multi: {vms_by_az: {a: 1, b: 0, c: 0}, flavor: i3en.2xlarge, version: "{{sys_version | default('')}}", auto_volumes: [{device_name: "/dev/sdb", mountpoint: "/media/data", fstype: "ext4", "volume_type": "ephemeral", ephemeral: ephemeral0}, {device_name: "/dev/sdbc", mountpoint: "/media/data", fstype: "ext4", "volume_type": "ephemeral", ephemeral: ephemeral0}] } } -# #hostnvme_lvm: {vms_by_az: {a: 1, b: 0, c: 0}, flavor: i3en.2xlarge, version: "{{sys_version | default('')}}", auto_volumes: [{device_name: "/dev/sdb", mountpoint: "/media/data", fstype: "ext4", "volume_type": "ephemeral", ephemeral: ephemeral0}, {device_name: "/dev/sdbc", mountpoint: "/media/data", fstype: "ext4", "volume_type": "ephemeral", ephemeral: ephemeral0}], lvmparams: {vg_name: "vg0", lv_name: "lv0", lv_size: "+100%FREE"} } -# #hosthdd_multi: {vms_by_az: {a: 1, b: 0, c: 0}, flavor: d2.xlarge, version: "{{sys_version | default('')}}", auto_volumes: [{device_name: "/dev/sdb", mountpoint: "/media/mysvc", fstype: "ext4", "volume_type": "ephemeral", ephemeral: ephemeral0}, {device_name: "/dev/sdc", mountpoint: "/media/mysvc2", fstype: "ext4", "volume_type": "ephemeral", ephemeral: ephemeral1}, {device_name: "/dev/sdd", mountpoint: "/media/mysvc3", fstype: "ext4", "volume_type": "ephemeral", ephemeral: ephemeral2}] } -# #hosthdd_lvm: {vms_by_az: {a: 1, b: 0, c: 0}, flavor: d2.xlarge, version: "{{sys_version | default('')}}", auto_volumes: [{device_name: "/dev/sdb", mountpoint: "/media/data", fstype: "ext4", "volume_type": "ephemeral", ephemeral: ephemeral0}, {device_name: "/dev/sdc", mountpoint: "/media/data", fstype: "ext4", "volume_type": "ephemeral", ephemeral: ephemeral1}, {device_name: "/dev/sdd", mountpoint: "/media/data", fstype: "ext4", "volume_type": "ephemeral", ephemeral: ephemeral2}], lvmparams: {vg_name: "vg0", lv_name: "lv0", lv_size: "+100%FREE"} } -# #hostssd: {vms_by_az: {a: 1, b: 0, c: 0}, flavor: c3.large, version: "{{sys_version | default('')}}", auto_volumes: [{device_name: "/dev/sdf", mountpoint: "/media/mysvc", fstype: "ext4", "volume_type": "gp2", "volume_size": 1, ephemeral: 0, encrypted: True, "delete_on_termination": true}]} +# #sysdisks-snapshot: {vms_by_az: {a: 1, b: 1, c: 0}, flavor: t3a.nano, version: "{{sys_version | default('')}}", auto_volumes: [{"snapshot_tags": {"tag:backup_id": "57180566894481854905"}, "device_name": "/dev/sdf", mountpoint: "/media/data", fstype: "ext4", "volume_type": "gp2", "volume_size": 1, encrypted: True, "delete_on_termination": true }]} +# #hostnvme-multi: {vms_by_az: {a: 1, b: 0, c: 0}, flavor: i3en.2xlarge, version: "{{sys_version | default('')}}", auto_volumes: [{device_name: "/dev/sdb", mountpoint: "/media/mysvc", fstype: "ext4", "volume_type": "ephemeral", ephemeral: ephemeral0}, {device_name: "/dev/sdc", mountpoint: "/media/mysvc2", fstype: "ext4", "volume_type": "ephemeral", ephemeral: ephemeral1}, {"device_name": "/dev/sdf", mountpoint: "/media/mysvc8", fstype: "ext4", "volume_type": "gp2", "volume_size": 1, encrypted: True, "delete_on_termination": true }] } +# #hostnvme-lvm: {vms_by_az: {a: 1, b: 0, c: 0}, flavor: i3en.2xlarge, version: "{{sys_version | default('')}}", auto_volumes: [{device_name: "/dev/sdb", mountpoint: "/media/data", fstype: "ext4", "volume_type": "ephemeral", ephemeral: ephemeral0}, {device_name: "/dev/sdc", mountpoint: "/media/data", fstype: "ext4", "volume_type": "ephemeral", ephemeral: ephemeral1}], lvmparams: {vg_name: "vg0", lv_name: "lv0", lv_size: "+100%FREE"} } +# #hosthdd-multi: {vms_by_az: {a: 1, b: 0, c: 0}, flavor: d2.xlarge, version: "{{sys_version | default('')}}", auto_volumes: [{device_name: "/dev/sdb", mountpoint: "/media/mysvc", fstype: "ext4", "volume_type": "ephemeral", ephemeral: ephemeral0}, {device_name: "/dev/sdc", mountpoint: "/media/mysvc2", fstype: "ext4", "volume_type": "ephemeral", ephemeral: ephemeral1}, {device_name: "/dev/sdd", mountpoint: "/media/mysvc3", fstype: "ext4", "volume_type": "ephemeral", ephemeral: ephemeral2}] } +# #hosthdd-lvm: {vms_by_az: {a: 1, b: 0, c: 0}, flavor: d2.xlarge, version: "{{sys_version | default('')}}", auto_volumes: [{device_name: "/dev/sdb", mountpoint: "/media/data", fstype: "ext4", "volume_type": "ephemeral", ephemeral: ephemeral0}, {device_name: "/dev/sdc", mountpoint: "/media/data", fstype: "ext4", "volume_type": "ephemeral", ephemeral: ephemeral1}, {device_name: "/dev/sdd", mountpoint: "/media/data", fstype: "ext4", "volume_type": "ephemeral", ephemeral: ephemeral2}], lvmparams: {vg_name: "vg0", lv_name: "lv0", lv_size: "+100%FREE"} } # aws_access_key: "" # aws_secret_key: "" # vpc_name: "test{{buildenv}}" diff --git a/EXAMPLE/group_vars/test_aws_euw1/cluster_vars.yml b/EXAMPLE/group_vars/test_aws_euw1/cluster_vars.yml index 810f4be2..e063d8c8 100644 --- a/EXAMPLE/group_vars/test_aws_euw1/cluster_vars.yml +++ b/EXAMPLE/group_vars/test_aws_euw1/cluster_vars.yml @@ -81,12 +81,11 @@ cluster_vars: sys: {vms_by_az: {a: 1, b: 1, c: 1}, flavor: t3a.nano, version: "{{sys_version | default('')}}", auto_volumes: []} # sysdisks2: {vms_by_az: {a: 1, b: 0, c: 0}, flavor: t3a.nano, version: "{{sysdisks_version | default('')}}", auto_volumes: [{"device_name": "/dev/sdf", mountpoint: "/media/mysvc", fstype: "ext4", "volume_type": "gp2", "volume_size": 1, encrypted: True, "delete_on_termination": true, perms: {owner: "root", group: "sudo", mode: "775"} }, {"device_name": "/dev/sdg", mountpoint: "/media/mysvc2", fstype: "ext4", "volume_type": "gp2", "volume_size": 1, encrypted: True, "delete_on_termination": true}]} # sysdisks3: {vms_by_az: {a: 1, b: 0, c: 0}, flavor: t3a.nano, version: "{{sysdisks_version | default('')}}", auto_volumes: [{"device_name": "/dev/sdf", mountpoint: "/media/mysvc", fstype: "ext4", "volume_type": "gp2", "volume_size": 1, encrypted: True, "delete_on_termination": true, perms: {owner: "root", group: "sudo", mode: "775"} }, {"device_name": "/dev/sdg", mountpoint: "/media/mysvc2", fstype: "ext4", "volume_type": "gp2", "volume_size": 1, encrypted: True, "delete_on_termination": true}, {"device_name": "/dev/sdh", mountpoint: "/media/mysvc3", fstype: "ext4", "volume_type": "gp2", "volume_size": 1, encrypted: True, "delete_on_termination": true}]} -# sysdisks_snapshot: {vms_by_az: {a: 1, b: 1, c: 0}, flavor: t3a.nano, version: "{{sys_version | default('')}}", auto_volumes: [{"snapshot_tags": {"tag:backup_id": "57180566894481854905"}, "device_name": "/dev/sdf", mountpoint: "/data", fstype: "ext4", "volume_type": "gp2", "volume_size": 1, encrypted: True, "delete_on_termination": true }]} -# hostnvme_multi: {vms_by_az: {a: 1, b: 0, c: 0}, flavor: i3en.2xlarge, version: "{{sys_version | default('')}}", auto_volumes: [{device_name: "/dev/sdb", mountpoint: "/media/data", fstype: "ext4", "volume_type": "ephemeral", ephemeral: ephemeral0}, {device_name: "/dev/sdbc", mountpoint: "/media/data", fstype: "ext4", "volume_type": "ephemeral", ephemeral: ephemeral0}] } } -# hostnvme_lvm: {vms_by_az: {a: 1, b: 0, c: 0}, flavor: i3en.2xlarge, version: "{{sys_version | default('')}}", auto_volumes: [{device_name: "/dev/sdb", mountpoint: "/media/data", fstype: "ext4", "volume_type": "ephemeral", ephemeral: ephemeral0}, {device_name: "/dev/sdbc", mountpoint: "/media/data", fstype: "ext4", "volume_type": "ephemeral", ephemeral: ephemeral0}], lvmparams: {vg_name: "vg0", lv_name: "lv0", lv_size: "+100%FREE"} } -# hosthdd_multi: {vms_by_az: {a: 1, b: 0, c: 0}, flavor: d2.xlarge, version: "{{sys_version | default('')}}", auto_volumes: [{device_name: "/dev/sdb", mountpoint: "/media/mysvc", fstype: "ext4", "volume_type": "ephemeral", ephemeral: ephemeral0}, {device_name: "/dev/sdc", mountpoint: "/media/mysvc2", fstype: "ext4", "volume_type": "ephemeral", ephemeral: ephemeral1}, {device_name: "/dev/sdd", mountpoint: "/media/mysvc3", fstype: "ext4", "volume_type": "ephemeral", ephemeral: ephemeral2}] } -# hosthdd_lvm: {vms_by_az: {a: 1, b: 0, c: 0}, flavor: d2.xlarge, version: "{{sys_version | default('')}}", auto_volumes: [{device_name: "/dev/sdb", mountpoint: "/media/data", fstype: "ext4", "volume_type": "ephemeral", ephemeral: ephemeral0}, {device_name: "/dev/sdc", mountpoint: "/media/data", fstype: "ext4", "volume_type": "ephemeral", ephemeral: ephemeral1}, {device_name: "/dev/sdd", mountpoint: "/media/data", fstype: "ext4", "volume_type": "ephemeral", ephemeral: ephemeral2}], lvmparams: {vg_name: "vg0", lv_name: "lv0", lv_size: "+100%FREE"} } -# hostssd: {vms_by_az: {a: 1, b: 0, c: 0}, flavor: c3.large, version: "{{sys_version | default('')}}", auto_volumes: [{device_name: "/dev/sdf", mountpoint: "/media/mysvc", fstype: "ext4", "volume_type": "gp2", "volume_size": 1, ephemeral: 0, encrypted: True, "delete_on_termination": true}]} +# sysdisks-snapshot: {vms_by_az: {a: 1, b: 1, c: 0}, flavor: t3a.nano, version: "{{sys_version | default('')}}", auto_volumes: [{"snapshot_tags": {"tag:backup_id": "57180566894481854905"}, "device_name": "/dev/sdf", mountpoint: "/media/data", fstype: "ext4", "volume_type": "gp2", "volume_size": 1, encrypted: True, "delete_on_termination": true }]} +# hostnvme-multi: {vms_by_az: {a: 1, b: 0, c: 0}, flavor: i3en.2xlarge, version: "{{sys_version | default('')}}", auto_volumes: [{device_name: "/dev/sdb", mountpoint: "/media/mysvc", fstype: "ext4", "volume_type": "ephemeral", ephemeral: ephemeral0}, {device_name: "/dev/sdc", mountpoint: "/media/mysvc2", fstype: "ext4", "volume_type": "ephemeral", ephemeral: ephemeral1}, {"device_name": "/dev/sdf", mountpoint: "/media/mysvc8", fstype: "ext4", "volume_type": "gp2", "volume_size": 1, encrypted: True, "delete_on_termination": true }] } +# hostnvme-lvm: {vms_by_az: {a: 1, b: 0, c: 0}, flavor: i3en.2xlarge, version: "{{sys_version | default('')}}", auto_volumes: [{device_name: "/dev/sdb", mountpoint: "/media/data", fstype: "ext4", "volume_type": "ephemeral", ephemeral: ephemeral0}, {device_name: "/dev/sdc", mountpoint: "/media/data", fstype: "ext4", "volume_type": "ephemeral", ephemeral: ephemeral1}], lvmparams: {vg_name: "vg0", lv_name: "lv0", lv_size: "+100%FREE"} } +# hosthdd-multi: {vms_by_az: {a: 1, b: 0, c: 0}, flavor: d2.xlarge, version: "{{sys_version | default('')}}", auto_volumes: [{device_name: "/dev/sdb", mountpoint: "/media/mysvc", fstype: "ext4", "volume_type": "ephemeral", ephemeral: ephemeral0}, {device_name: "/dev/sdc", mountpoint: "/media/mysvc2", fstype: "ext4", "volume_type": "ephemeral", ephemeral: ephemeral1}, {device_name: "/dev/sdd", mountpoint: "/media/mysvc3", fstype: "ext4", "volume_type": "ephemeral", ephemeral: ephemeral2}] } +# hosthdd-lvm: {vms_by_az: {a: 1, b: 0, c: 0}, flavor: d2.xlarge, version: "{{sys_version | default('')}}", auto_volumes: [{device_name: "/dev/sdb", mountpoint: "/media/data", fstype: "ext4", "volume_type": "ephemeral", ephemeral: ephemeral0}, {device_name: "/dev/sdc", mountpoint: "/media/data", fstype: "ext4", "volume_type": "ephemeral", ephemeral: ephemeral1}, {device_name: "/dev/sdd", mountpoint: "/media/data", fstype: "ext4", "volume_type": "ephemeral", ephemeral: ephemeral2}], lvmparams: {vg_name: "vg0", lv_name: "lv0", lv_size: "+100%FREE"} } aws_access_key: "" aws_secret_key: "" vpc_name: "test{{buildenv}}" diff --git a/_dependencies/library/ebsmap.py b/_dependencies/library/ebsmap.py index dbd6d717..0cb47d6a 100644 --- a/_dependencies/library/ebsmap.py +++ b/_dependencies/library/ebsmap.py @@ -110,6 +110,11 @@ except: pass +try: + from urllib.request import urlopen +except ImportError: + from urllib2 import urlopen + NVME_ADMIN_IDENTIFY = 0x06 NVME_IOCTL_ADMIN_CMD = 0xC0484E41 AMZN_NVME_VID = 0x1D0F @@ -243,10 +248,22 @@ def fail_json(self, msg): module = cDummyAnsibleModule() # Get all existing block volumes by key=value, then parse this into a dictionary (which excludes non disk and partition block types, e.g. ram, loop). - lsblk_devices = subprocess.check_output(['lsblk', '-o', 'NAME,TYPE,UUID,FSTYPE,PARTLABEL,MOUNTPOINT,SERIAL', '-P']).decode().rstrip().split('\n') + lsblk_devices = subprocess.check_output(['lsblk', '-o', 'NAME,TYPE,UUID,FSTYPE,MOUNTPOINT,MODEL,SERIAL', '-P']).decode().rstrip().split('\n') os_device_names = [dict((map(lambda x: x.strip("\""), sub.split("="))) for sub in dev.split('\" ') if '=' in sub) for dev in lsblk_devices] os_device_names = [dev for dev in os_device_names if dev['TYPE'] in ['disk', 'part', 'lvm']] - + os_device_names.sort(key=lambda k: k['NAME']) + + # Instance stores (AKA ephemeral volumes) do not appear to have a defined endpoint that maps between the /dev/sd[b-e] defined in the instance creation map, and the OS /dev/nvme[0-26]n1 device. + # For this scenario, we can only return the instance stores in the order that they are defined. Because instance stores do not survive a poweroff and cannot be detached and reattached, the order doesn't matter as much. + instance_store_map = [] + with urlopen("http://169.254.169.254/latest/meta-data/block-device-mapping/") as response__block_device_mapping: + block_device_mappings = response__block_device_mapping.read().decode().split("\n") + for block_device_mappings__ephemeral_id in [dev for dev in block_device_mappings if dev.startswith('ephemeral')]: + with urlopen("http://169.254.169.254/latest/meta-data/block-device-mapping/" + block_device_mappings__ephemeral_id) as response__ephemeral_device: + block_device_mappings__ephemeral_mapped = response__ephemeral_device.read().decode() + instance_store_map.append({'ephemeral_id': block_device_mappings__ephemeral_id, 'ephemeral_map': block_device_mappings__ephemeral_mapped}) + + instance_store_count = 0 for os_device in os_device_names: os_device_path = "/dev/" + os_device['NAME'] if os_device['NAME'].startswith("nvme"): @@ -254,6 +271,12 @@ def fail_json(self, msg): dev = ebs_nvme_device(os_device_path) except FileNotFoundError as e: module.fail_json(msg=os_device_path + ": FileNotFoundError" + str(e)) + except TypeError as e: + if instance_store_count < len(instance_store_map): + os_device.update({"device_name_os": os_device_path, "device_name_aws": '/dev/' + instance_store_map[instance_store_count]['ephemeral_map'], "volume_id": dev.get_volume_id()}) + instance_store_count += 1 + else: + module.warn(u"%s is not an EBS device and there is no instance store mapping." % os_device_path) except OSError as e: module.warn(u"%s is not an nvme device." % os_device_path) else: From db099eb0045d4ef99b9e5516a3a79a3c748f2290 Mon Sep 17 00:00:00 2001 From: Dougal Seeley Date: Mon, 5 Oct 2020 18:33:41 +0100 Subject: [PATCH 36/58] Fix reattach syntax --- create/tasks/aws.yml | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/create/tasks/aws.yml b/create/tasks/aws.yml index 4aee2484..792dcaec 100644 --- a/create/tasks/aws.yml +++ b/create/tasks/aws.yml @@ -30,8 +30,7 @@ region: "{{cluster_vars.region}}" id: "{{item.auto_volume.src.volume_id}}" instance: None - loop: "{{ cluster_hosts_target_denormalised_by_volume }}" - when: "'src' in item.auto_volume" + loop: "{{ cluster_hosts_target_denormalised_by_volume | selectattr('auto_volume.src', 'defined') | list }}" - name: create/aws | Create EC2 VMs asynchronously ec2: @@ -96,7 +95,7 @@ volume_size: "{%- if 'src' not in item.auto_volume -%}{{item.auto_volume.volume_size}}{%- endif -%}" volume_type: "{{item.auto_volume.volume_type}}" delete_on_termination: yes - loop: "{{ cluster_hosts_target_denormalised_by_volume| selectattr('src', 'defined') | list }}" + loop: "{{ cluster_hosts_target_denormalised_by_volume | selectattr('auto_volume.src', 'defined') | list }}" async: 7200 poll: 0 register: r__ec2_vol From c870eb77e9c7caa7247521106242d09d069fcebe Mon Sep 17 00:00:00 2001 From: Dougal Seeley Date: Tue, 6 Oct 2020 10:25:57 +0100 Subject: [PATCH 37/58] Remove debug that is invalid during redeploy --- config/tasks/disks_auto_aws.yml | 4 ---- 1 file changed, 4 deletions(-) diff --git a/config/tasks/disks_auto_aws.yml b/config/tasks/disks_auto_aws.yml index bc2f8859..b3626846 100644 --- a/config/tasks/disks_auto_aws.yml +++ b/config/tasks/disks_auto_aws.yml @@ -3,10 +3,6 @@ - name: disks_auto_aws | auto_volumes debug: msg={{ cluster_vars[buildenv].hosttype_vars[hostvars[inventory_hostname].hosttype].auto_volumes }} -- name: disks_auto_aws | cluster_hosts_target(inventory_hostname) - debug: msg={{ (cluster_hosts_target | selectattr('hostname', '==', inventory_hostname) | list | first)['auto_volumes'] }} - - - name: disks_auto_aws | Mount volumes as individual disks block: - name: disks_auto_aws | Get the nvme information (pre-filesystem create) From 69de508b7ce68186ed42944aaae03aa97d3d09a2 Mon Sep 17 00:00:00 2001 From: Dougal Seeley Date: Tue, 6 Oct 2020 16:15:21 +0100 Subject: [PATCH 38/58] Add test code to allow creating a file in each mount that identifies where it was supposed to be mounted. --- config/tasks/disks_auto_aws.yml | 54 +++++++++++++++++++++++++++----- dynamic_inventory/tasks/main.yml | 4 ++- 2 files changed, 49 insertions(+), 9 deletions(-) diff --git a/config/tasks/disks_auto_aws.yml b/config/tasks/disks_auto_aws.yml index b3626846..479982d9 100644 --- a/config/tasks/disks_auto_aws.yml +++ b/config/tasks/disks_auto_aws.yml @@ -1,7 +1,7 @@ --- -- name: disks_auto_aws | auto_volumes - debug: msg={{ cluster_vars[buildenv].hosttype_vars[hostvars[inventory_hostname].hosttype].auto_volumes }} +- name: disks_auto_aws | cluster_vars[buildenv].hosttype_vars[hostvars[inventory_hostname].hosttype + debug: msg={{ cluster_vars[buildenv].hosttype_vars[hostvars[inventory_hostname].hosttype] }} - name: disks_auto_aws | Mount volumes as individual disks block: @@ -24,10 +24,10 @@ ebsmap: become: yes register: r__ebsmap - + - name: disks_auto_aws | r__ebsmap (post-filesystem create) debug: msg={{r__ebsmap}} - + - name: disks_auto_aws | Mount created filesytem(s) persistently become: yes mount: @@ -37,7 +37,7 @@ state: mounted opts: _netdev loop: "{{auto_vols}}" - + - name: disks_auto_aws | change ownership of mountpoint (if set) become: yes file: @@ -47,6 +47,26 @@ owner: "{{ item.perms.owner | default(omit)}}" group: "{{ item.perms.group | default(omit)}}" loop: "{{auto_vols}}" + + - block: + - name: disks_auto_aws | Touch a file with the mountpoint and device name for testing that disk attachment is correct + become: yes + file: + path: "{{item.mountpoint}}/__clusterversetest_{{ item.mountpoint | regex_replace('\/', '_') }}_{{ item.device_name | regex_replace('\/', '_') }}" + state: touch + loop: "{{auto_vols}}" + + - name: disks_auto_aws | Find all __clusterversetest_ files in newly mounted disks + find: + paths: "{{item.mountpoint}}" + patterns: "__clusterversetest_*" + loop: "{{auto_vols}}" + register: r__find_test + + - name: disks_auto_aws | Display all __clusterversetest_ files in newly mounted disks. + debug: + msg: "{{ r__find_test | json_query(\"results[].{device_name: item.device_name, mountpoint: item.mountpoint, files: files[].path}\") }}" + when: test_touch_disks is defined and test_touch_disks|bool when: (auto_vols | map(attribute='mountpoint') | list | unique | count == auto_vols | map(attribute='mountpoint') | list | count) vars: auto_vols: "{{ cluster_vars[buildenv].hosttype_vars[hostvars[inventory_hostname].hosttype].auto_volumes }}" @@ -88,18 +108,36 @@ - name: disks_auto_aws | Create filesystem(s) on attached nvme volume(s) become: yes filesystem: - fstype: "{{ hosttype_vars.auto_volumes | map(attribute='fstype') | list | unique | join('') }}" + fstype: "{{ hosttype_vars.auto_volumes[0].fstype }}" dev: "/dev/{{ hosttype_vars.lvmparams.vg_name }}/{{ hosttype_vars.lvmparams.lv_name }}" force: no - name: disks_auto_aws | Mount created filesytem(s) persistently become: yes mount: - path: "{{ hosttype_vars.auto_volumes | map(attribute='mountpoint') | list | unique | join('') }}" + path: "{{ hosttype_vars.auto_volumes[0].mountpoint }}" src: "/dev/{{ hosttype_vars.lvmparams.vg_name }}/{{ hosttype_vars.lvmparams.lv_name }}" - fstype: "{{ hosttype_vars.auto_volumes | map(attribute='fstype') | list | unique | join('') }}" + fstype: "{{ hosttype_vars.auto_volumes[0].fstype }}" state: mounted opts: _netdev + + - block: + - name: disks_auto_aws | Touch a file with the mountpoint and device name for testing that disk attachment is correct + become: yes + file: + path: "{{ hosttype_vars.auto_volumes[0].mountpoint }}/__clusterversetest_{{ hosttype_vars.auto_volumes[0].mountpoint | regex_replace('\/', '_') }}" + state: touch + + - name: disks_auto_aws | Find all __clusterversetest_ files in newly mounted disks + find: + paths: "{{ hosttype_vars.auto_volumes[0].mountpoint }}" + patterns: "__clusterversetest_*" + register: r__find_test + + - name: disks_auto_aws | Display all __clusterversetest_ files in newly mounted disks. + debug: + msg: "{{ r__find_test | json_query(\"files[].path\") }}" + when: test_touch_disks is defined and test_touch_disks|bool when: ('lvmparams' in hosttype_vars) and (hosttype_vars.auto_volumes | map(attribute='mountpoint') | list | unique | count == 1) and (hosttype_vars.auto_volumes | map(attribute='mountpoint') | list | count >= 2) and (hosttype_vars.auto_volumes | map(attribute='fstype') | list | unique | count == 1) vars: hosttype_vars: "{{ cluster_vars[buildenv].hosttype_vars[hostvars[inventory_hostname].hosttype] }}" diff --git a/dynamic_inventory/tasks/main.yml b/dynamic_inventory/tasks/main.yml index 4c041104..8647b8e2 100644 --- a/dynamic_inventory/tasks/main.yml +++ b/dynamic_inventory/tasks/main.yml @@ -14,7 +14,9 @@ ping: delegate_to: "{{ item.inventory_ip }}" with_items: "{{ dynamic_inventory_flat }}" - retries: 12 + register: r__ping + until: r__ping is success + retries: 5 - name: dynamic_inventory | Refresh (clean it, because there is no file or plugin inventory defined) the in-memory inventory prior to building it (this is in case this module is called multiple times, and we otherwise only add hosts to existing inventory) meta: refresh_inventory From c94299b47f61e4f53763f0f1929f2333288495f1 Mon Sep 17 00:00:00 2001 From: Dougal Seeley Date: Thu, 22 Oct 2020 16:36:45 +0100 Subject: [PATCH 39/58] Fix missing jinja2 endfor in disks_auto_generic.yml (i.e. GCP disk mounting) --- config/tasks/disks_auto_generic.yml | 1 + 1 file changed, 1 insertion(+) diff --git a/config/tasks/disks_auto_generic.yml b/config/tasks/disks_auto_generic.yml index cde01eac..4c30ee1e 100644 --- a/config/tasks/disks_auto_generic.yml +++ b/config/tasks/disks_auto_generic.yml @@ -33,6 +33,7 @@ {%- set blkvolloop.break = true -%} {%- endif -%} {%- endfor -%} + {%- endfor -%} {{ res }} #- name: disks_auto_generic | hostvols From 55cf57cebb5f3d6b12cad3e511fd42da124fb764 Mon Sep 17 00:00:00 2001 From: Dougal Seeley Date: Sun, 25 Oct 2020 12:12:52 +0000 Subject: [PATCH 40/58] Add GCP functionality for _scheme_rmvm_keepdisk_rollback. + Also add disk labelling for GCP. --- _dependencies/library/blockdevmap.py | 347 ++++++++++++++++++ .../{ebsmap__LICENSE => blockdevmap_LICENSE} | 6 + _dependencies/library/blockdevmap_README.md | 44 +++ _dependencies/library/ebsmap.py | 293 --------------- _dependencies/library/ebsmap__README.md | 24 -- .../tasks/get_cluster_hosts_target.yml | 22 +- ...ks_auto_aws.yml => disks_auto_aws_gcp.yml} | 83 +++-- config/tasks/disks_auto_generic.yml | 35 +- config/tasks/main.yml | 12 +- create/tasks/gcp.yml | 137 ++++++- ..._diskinfo_to_cluster_hosts_target__aws.yml | 8 +- ..._diskinfo_to_cluster_hosts_target__gcp.yml | 39 ++ .../tasks/preflight.yml | 2 +- 13 files changed, 639 insertions(+), 413 deletions(-) create mode 100644 _dependencies/library/blockdevmap.py rename _dependencies/library/{ebsmap__LICENSE => blockdevmap_LICENSE} (94%) create mode 100644 _dependencies/library/blockdevmap_README.md delete mode 100644 _dependencies/library/ebsmap.py delete mode 100644 _dependencies/library/ebsmap__README.md rename config/tasks/{disks_auto_aws.yml => disks_auto_aws_gcp.yml} (50%) create mode 100644 redeploy/_scheme_rmvm_keepdisk_rollback/tasks/_add_src_diskinfo_to_cluster_hosts_target__gcp.yml diff --git a/_dependencies/library/blockdevmap.py b/_dependencies/library/blockdevmap.py new file mode 100644 index 00000000..3581f437 --- /dev/null +++ b/_dependencies/library/blockdevmap.py @@ -0,0 +1,347 @@ +# Copyright 2020 Dougal Seeley +# https://github.com/dseeley/blockdevmap + +# Copyright 2017 Amazon.com, Inc. and its affiliates. All Rights Reserved. +# Licensed under the MIT License. See the LICENSE accompanying this file +# for the specific language governing permissions and limitations under +# the License. +# /sbin/ebsnvme-id - https://docs.aws.amazon.com/AWSEC2/latest/UserGuide/nvme-ebs-volumes.html + +from __future__ import (absolute_import, division, print_function) + +__metaclass__ = type + +DOCUMENTATION = ''' +--- +module: blockdevmap +version_added: 1.0.0 +short_description: blockdevmap +description: + - Map the block device name as defined in AWS/GCP (e.g. /dev/sdf) with the volume provided to the OS +authors: + - Dougal Seeley + - Amazon.com Inc. +''' + +EXAMPLES = ''' +- name: Get block device map information for GCP + blockdevmap: + cloud_type: gcp + become: yes + register: r__blockdevmap + +- name: Get block device map information for AWS + blockdevmap: + cloud_type: aws + become: yes + register: r__blockdevmap + +- name: Get lsblk device map information + blockdevmap: + cloud_type: lsblk + become: yes + register: r__blockdevmap + +- name: debug blockdevmap + debug: msg={{r__blockdevmap}} +''' + +RETURN = ''' +"device_map": [ + { + "FSTYPE": "ext4", + "MOUNTPOINT": "/media/mysvc", + "NAME": "nvme1n1", + "PARTLABEL": "", + "SERIAL": "vol0c2c47ee4516063e9", + "TYPE": "disk", + "UUID": "c3630dbe-042e-44e5-ac67-54fa1c9e4cd2", + "device_name_cloud": "/dev/sdf", + "device_name_os": "/dev/nvme1n1", + "volume_id": "vol-0c2c47ee4516063e9" + }, + { + "FSTYPE": "", + "MOUNTPOINT": "", + "NAME": "nvme0n1", + "PARTLABEL": "", + "SERIAL": "vol0b05e48d5677db81a", + "TYPE": "disk", + "UUID": "", + "device_name_cloud": "/dev/sda1", + "device_name_os": "/dev/nvme0n1", + "volume_id": "vol-0b05e48d5677db81a" + }, + { + "FSTYPE": "ext4", + "MOUNTPOINT": "/", + "NAME": "nvme0n1p1", + "PARTLABEL": "", + "SERIAL": "", + "TYPE": "part", + "UUID": "96ec7adb-9d94-41c0-96a5-d6992c9d5f20", + "device_name_cloud": "/dev/sda1", + "device_name_os": "/dev/nvme0n1p1", + "volume_id": "vol-0b05e48d5677db81a" + } + +"device_map": [ + { + "FSTYPE": "", + "MOUNTPOINT": "", + "NAME": "xvda", + "PARTLABEL": "", + "SERIAL": "", + "TYPE": "disk", + "UUID": "", + "device_name_cloud": "/dev/sda", + "device_name_os": "/dev/xvda" + }, + { + "FSTYPE": "ext4", + "MOUNTPOINT": "/", + "NAME": "xvda1", + "PARTLABEL": "", + "SERIAL": "", + "TYPE": "part", + "UUID": "96ec7adb-9d94-41c0-96a5-d6992c9d5f20", + "device_name_cloud": "/dev/sda1", + "device_name_os": "/dev/xvda1" + } +''' + +from ctypes import * +from fcntl import ioctl +import subprocess +import sys +import json +import re + +try: + from ansible.module_utils.basic import AnsibleModule + from ansible.errors import AnsibleError + from ansible.utils.display import Display +except: + pass + +try: + from urllib.request import urlopen +except ImportError: + from urllib2 import urlopen + +NVME_ADMIN_IDENTIFY = 0x06 +NVME_IOCTL_ADMIN_CMD = 0xC0484E41 +AMZN_NVME_VID = 0x1D0F +AMZN_NVME_EBS_MN = "Amazon Elastic Block Store" + + +class nvme_admin_command(Structure): + _pack_ = 1 + _fields_ = [("opcode", c_uint8), # op code + ("flags", c_uint8), # fused operation + ("cid", c_uint16), # command id + ("nsid", c_uint32), # namespace id + ("reserved0", c_uint64), + ("mptr", c_uint64), # metadata pointer + ("addr", c_uint64), # data pointer + ("mlen", c_uint32), # metadata length + ("alen", c_uint32), # data length + ("cdw10", c_uint32), + ("cdw11", c_uint32), + ("cdw12", c_uint32), + ("cdw13", c_uint32), + ("cdw14", c_uint32), + ("cdw15", c_uint32), + ("reserved1", c_uint64)] + + +class nvme_identify_controller_amzn_vs(Structure): + _pack_ = 1 + _fields_ = [("bdev", c_char * 32), # block device name + ("reserved0", c_char * (1024 - 32))] + + +class nvme_identify_controller_psd(Structure): + _pack_ = 1 + _fields_ = [("mp", c_uint16), # maximum power + ("reserved0", c_uint16), + ("enlat", c_uint32), # entry latency + ("exlat", c_uint32), # exit latency + ("rrt", c_uint8), # relative read throughput + ("rrl", c_uint8), # relative read latency + ("rwt", c_uint8), # relative write throughput + ("rwl", c_uint8), # relative write latency + ("reserved1", c_char * 16)] + + +class nvme_identify_controller(Structure): + _pack_ = 1 + _fields_ = [("vid", c_uint16), # PCI Vendor ID + ("ssvid", c_uint16), # PCI Subsystem Vendor ID + ("sn", c_char * 20), # Serial Number + ("mn", c_char * 40), # Module Number + ("fr", c_char * 8), # Firmware Revision + ("rab", c_uint8), # Recommend Arbitration Burst + ("ieee", c_uint8 * 3), # IEEE OUI Identifier + ("mic", c_uint8), # Multi-Interface Capabilities + ("mdts", c_uint8), # Maximum Data Transfer Size + ("reserved0", c_uint8 * (256 - 78)), + ("oacs", c_uint16), # Optional Admin Command Support + ("acl", c_uint8), # Abort Command Limit + ("aerl", c_uint8), # Asynchronous Event Request Limit + ("frmw", c_uint8), # Firmware Updates + ("lpa", c_uint8), # Log Page Attributes + ("elpe", c_uint8), # Error Log Page Entries + ("npss", c_uint8), # Number of Power States Support + ("avscc", c_uint8), # Admin Vendor Specific Command Configuration + ("reserved1", c_uint8 * (512 - 265)), + ("sqes", c_uint8), # Submission Queue Entry Size + ("cqes", c_uint8), # Completion Queue Entry Size + ("reserved2", c_uint16), + ("nn", c_uint32), # Number of Namespaces + ("oncs", c_uint16), # Optional NVM Command Support + ("fuses", c_uint16), # Fused Operation Support + ("fna", c_uint8), # Format NVM Attributes + ("vwc", c_uint8), # Volatile Write Cache + ("awun", c_uint16), # Atomic Write Unit Normal + ("awupf", c_uint16), # Atomic Write Unit Power Fail + ("nvscc", c_uint8), # NVM Vendor Specific Command Configuration + ("reserved3", c_uint8 * (704 - 531)), + ("reserved4", c_uint8 * (2048 - 704)), + ("psd", nvme_identify_controller_psd * 32), # Power State Descriptor + ("vs", nvme_identify_controller_amzn_vs)] # Vendor Specific + + +class cBlockDevMap(object): + def __init__(self, module, **kwds): + self.module = module + self.device_map = [] + + def get_lsblk(self): + # Get all existing block volumes by key=value, then parse this into a dictionary (which excludes non disk and partition block types, e.g. ram, loop). + lsblk_devices = subprocess.check_output(['lsblk', '-o', 'NAME,TYPE,UUID,FSTYPE,MOUNTPOINT,MODEL,PTTYPE,SERIAL,SIZE', '-P', '-b']).decode().rstrip().split('\n') + os_device_names = [dict((map(lambda x: x.strip("\""), sub.split("="))) for sub in dev.split('\" ') if '=' in sub) for dev in lsblk_devices] + os_device_names = [dev for dev in os_device_names if dev['TYPE'] in ['disk', 'part', 'lvm']] + os_device_names.sort(key=lambda k: k['NAME']) + return os_device_names + + +class cLsblkMapper(cBlockDevMap): + def __init__(self, **kwds): + super().__init__(**kwds) + + self.device_map = self.get_lsblk() + for os_device in self.device_map: + os_device.update({"device_name_os": "/dev/" + os_device['NAME'], "device_name_cloud": ""}) + + +class cGCPMapper(cBlockDevMap): + def __init__(self, **kwds): + super().__init__(**kwds) + + self.device_map = self.get_lsblk() + + for os_device in self.device_map: + os_device.update({"device_name_os": "/dev/" + os_device['NAME'], "device_name_cloud": os_device['SERIAL']}) + + +class cAwsMapper(cBlockDevMap): + def __init__(self, **kwds): + super().__init__(**kwds) + # Instance stores (AKA ephemeral volumes) do not appear to have a defined endpoint that maps between the /dev/sd[b-e] defined in the instance creation map, and the OS /dev/nvme[0-26]n1 device. + # For this scenario, we can only return the instance stores in the order that they are defined. Because instance stores do not survive a poweroff and cannot be detached and reattached, the order doesn't matter as much. + instance_store_map = [] + with urlopen("http://169.254.169.254/latest/meta-data/block-device-mapping/") as response__block_device_mapping: + block_device_mappings = response__block_device_mapping.read().decode().split("\n") + for block_device_mappings__ephemeral_id in [dev for dev in block_device_mappings if dev.startswith('ephemeral')]: + with urlopen("http://169.254.169.254/latest/meta-data/block-device-mapping/" + block_device_mappings__ephemeral_id) as response__ephemeral_device: + block_device_mappings__ephemeral_mapped = response__ephemeral_device.read().decode() + instance_store_map.append({'ephemeral_id': block_device_mappings__ephemeral_id, 'ephemeral_map': block_device_mappings__ephemeral_mapped}) + + instance_store_count = 0 + self.device_map = self.get_lsblk() + for os_device in self.device_map: + os_device_path = "/dev/" + os_device['NAME'] + if os_device['NAME'].startswith("nvme"): + try: + dev = cAwsMapper.ebs_nvme_device(os_device_path) + except FileNotFoundError as e: + self.module.fail_json(msg=os_device_path + ": FileNotFoundError" + str(e)) + except TypeError as e: + if instance_store_count < len(instance_store_map): + os_device.update({"device_name_os": os_device_path, "device_name_cloud": '/dev/' + instance_store_map[instance_store_count]['ephemeral_map'], "volume_id": dev.get_volume_id()}) + instance_store_count += 1 + else: + self.module.warn(u"%s is not an EBS device and there is no instance store mapping." % os_device_path) + except OSError as e: + self.module.warn(u"%s is not an nvme device." % os_device_path) + else: + os_device.update({"device_name_os": os_device_path, "device_name_cloud": '/dev/' + dev.get_block_device(stripped=True).rstrip(), "volume_id": dev.get_volume_id()}) + elif os_device['NAME'].startswith("xvd"): + os_device.update({"device_name_os": os_device_path, "device_name_cloud": '/dev/' + re.sub(r'xvd(.*)', r'sd\1', os_device['NAME'])}) + else: + os_device.update({"device_name_os": os_device_path, "device_name_cloud": ""}) + + class ebs_nvme_device(): + def __init__(self, device): + self.device = device + self.ctrl_identify() + + def _nvme_ioctl(self, id_response, id_len): + admin_cmd = nvme_admin_command(opcode=NVME_ADMIN_IDENTIFY, addr=id_response, alen=id_len, cdw10=1) + with open(self.device, "rt") as nvme: + ioctl(nvme, NVME_IOCTL_ADMIN_CMD, admin_cmd) + + def ctrl_identify(self): + self.id_ctrl = nvme_identify_controller() + self._nvme_ioctl(addressof(self.id_ctrl), sizeof(self.id_ctrl)) + if self.id_ctrl.vid != AMZN_NVME_VID or self.id_ctrl.mn.decode().strip() != AMZN_NVME_EBS_MN: + raise TypeError("[ERROR] Not an EBS device: '{0}'".format(self.device)) + + def get_volume_id(self): + vol = self.id_ctrl.sn.decode() + if vol.startswith("vol") and vol[3] != "-": + vol = "vol-" + vol[3:] + return vol + + def get_block_device(self, stripped=False): + device = self.id_ctrl.vs.bdev.decode() + if stripped and device.startswith("/dev/"): + device = device[5:] + return device + + +def main(): + if not (len(sys.argv) > 1 and sys.argv[1] == "console"): + module = AnsibleModule(argument_spec={"cloud_type": {"type": "str", "required": True, "choices": ['aws', 'gcp', 'lsblk']}}, supports_check_mode=True) + else: + # For testing without Ansible (e.g on Windows) + class cDummyAnsibleModule(): + params = {"cloud_type": "aws"} + + def exit_json(self, changed, **kwargs): + print(changed, json.dumps(kwargs, sort_keys=True, indent=4, separators=(',', ': '))) + + def warn(self, msg): + print("[WARNING]: " + msg) + + def fail_json(self, msg): + print("Failed: " + msg) + exit(1) + + module = cDummyAnsibleModule() + + if module.params['cloud_type'] == 'aws': + blockdevmap = cAwsMapper(module=module) + elif module.params['cloud_type'] == 'gcp': + blockdevmap = cGCPMapper(module=module) + elif module.params['cloud_type'] == 'lsblk': + blockdevmap = cLsblkMapper(module=module) + else: + module.fail_json(msg="cloud_type not valid :" + module.params['cloud_type']) + + module.exit_json(changed=False, device_map=blockdevmap.device_map) + + +if __name__ == '__main__': + main() diff --git a/_dependencies/library/ebsmap__LICENSE b/_dependencies/library/blockdevmap_LICENSE similarity index 94% rename from _dependencies/library/ebsmap__LICENSE rename to _dependencies/library/blockdevmap_LICENSE index 55138771..7d404386 100644 --- a/_dependencies/library/ebsmap__LICENSE +++ b/_dependencies/library/blockdevmap_LICENSE @@ -1,3 +1,6 @@ +--- +## For the blockdevmap.py script: + BSD 3-Clause License Copyright (c) 2020, Dougal Seeley @@ -30,6 +33,9 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. --- +## For the parts of blockdevmap.py derived from the /sbin/ebsnvme-id (Amazon.com, Inc) script: + +MIT License Copyright 2017 Amazon.com, Inc. and its affiliates. All Rights Reserved. diff --git a/_dependencies/library/blockdevmap_README.md b/_dependencies/library/blockdevmap_README.md new file mode 100644 index 00000000..7f9b8fb7 --- /dev/null +++ b/_dependencies/library/blockdevmap_README.md @@ -0,0 +1,44 @@ +# blockdevmap +This is an Ansible module that is able to map AWS and GCP device names to the host device names. It returns a dictionary, derived from Linux `lsblk`, (augmented in the case of AWS with results from elsewhere). + +### AWS ++ On AWS 'nitro' instances all EBS mappings are attached to the NVME controller. The nvme mapping is non-deterministic though, so the script uses ioctl commands to query the nvme controller (from a script by Amazon that is present on 'Amazon Linux' machines: `/sbin/ebsnvme-id`. See documentation: https://docs.aws.amazon.com/AWSEC2/latest/UserGuide/nvme-ebs-volumes). ++ For non-nitro EBS mapping, the script enumerates the mapping in the alphanumerical order of the disk device names. This is the correct order except for some very old RHEL/Centos AMIs, which are not supported. ++ For ephemeral volume mapping, it uses the http://169.254.169.254/latest/meta-data/block-device-mapping/ endpoint. + +### GCP ++ GCP device names are user-defined, and appear as entries in the `lsblk` _SERIAL_ column, mapped to the `lsblk` _NAME_ column. + +### lsblk ++ The script can be run as plain `lsblk` command, where the cloud provider does not include a mapping, and will return the information as a dictionary. For example, the _bytes_ mapped to the _NAME_ field could be cross-checked against the requested disk size to create a mapping. + + +## Execution +This can be run as an Ansible module (needs root): +```yaml +- name: Get block device map information for GCP + blockdevmap: + cloud_type: gcp + become: yes + register: r__blockdevmap + +- name: Get block device map information for AWS + blockdevmap: + cloud_type: aws + become: yes + register: r__blockdevmap + +- name: Get lsblk device map information + blockdevmap: + cloud_type: lsblk + become: yes + register: r__blockdevmap + +- name: debug blockdevmap + debug: msg={{r__blockdevmap}} +``` + +or from the console: +```bash +python3 ./blockdevmap.py console +``` \ No newline at end of file diff --git a/_dependencies/library/ebsmap.py b/_dependencies/library/ebsmap.py deleted file mode 100644 index 0cb47d6a..00000000 --- a/_dependencies/library/ebsmap.py +++ /dev/null @@ -1,293 +0,0 @@ -# Copyright 2020 Dougal Seeley -# https://github.com/dseeley/ebsmap - -# Copyright 2017 Amazon.com, Inc. and its affiliates. All Rights Reserved. -# Licensed under the MIT License. See the LICENSE accompanying this file -# for the specific language governing permissions and limitations under -# the License. - -from __future__ import (absolute_import, division, print_function) - -__metaclass__ = type - -DOCUMENTATION = ''' ---- -module: ebsmap -version_added: 1.0.0 -short_description: ebsmap -description: - - Map the EBS device name as defined in AWS (e.g. /dev/sdf) with the volume provided to the OS -author: - - Dougal Seeley - - Amazon.com inc. -''' - -EXAMPLES = ''' -- name: Get the nvme map information - ebsmap: - become: yes - register: r__ebsmap - -- name: ebsmap - debug: msg={{ebsmap}} -''' - -RETURN = ''' -"device_map": [ - { - "FSTYPE": "ext4", - "MOUNTPOINT": "/media/mysvc", - "NAME": "nvme1n1", - "PARTLABEL": "", - "SERIAL": "vol0c2c47ee4516063e9", - "TYPE": "disk", - "UUID": "c3630dbe-042e-44e5-ac67-54fa1c9e4cd2", - "device_name_aws": "/dev/sdf", - "device_name_os": "/dev/nvme1n1", - "volume_id": "vol-0c2c47ee4516063e9" - }, - { - "FSTYPE": "", - "MOUNTPOINT": "", - "NAME": "nvme0n1", - "PARTLABEL": "", - "SERIAL": "vol0b05e48d5677db81a", - "TYPE": "disk", - "UUID": "", - "device_name_aws": "/dev/sda1", - "device_name_os": "/dev/nvme0n1", - "volume_id": "vol-0b05e48d5677db81a" - }, - { - "FSTYPE": "ext4", - "MOUNTPOINT": "/", - "NAME": "nvme0n1p1", - "PARTLABEL": "", - "SERIAL": "", - "TYPE": "part", - "UUID": "96ec7adb-9d94-41c0-96a5-d6992c9d5f20", - "device_name_aws": "/dev/sda1", - "device_name_os": "/dev/nvme0n1p1", - "volume_id": "vol-0b05e48d5677db81a" - } - -"device_map": [ - { - "FSTYPE": "", - "MOUNTPOINT": "", - "NAME": "xvda", - "PARTLABEL": "", - "SERIAL": "", - "TYPE": "disk", - "UUID": "", - "device_name_aws": "/dev/sda", - "device_name_os": "/dev/xvda" - }, - { - "FSTYPE": "ext4", - "MOUNTPOINT": "/", - "NAME": "xvda1", - "PARTLABEL": "", - "SERIAL": "", - "TYPE": "part", - "UUID": "96ec7adb-9d94-41c0-96a5-d6992c9d5f20", - "device_name_aws": "/dev/sda1", - "device_name_os": "/dev/xvda1" - } -''' - -from ctypes import * -from fcntl import ioctl -import subprocess -import sys -import json -import re - -try: - from ansible.module_utils.basic import AnsibleModule - from ansible.errors import AnsibleError - from ansible.utils.display import Display -except: - pass - -try: - from urllib.request import urlopen -except ImportError: - from urllib2 import urlopen - -NVME_ADMIN_IDENTIFY = 0x06 -NVME_IOCTL_ADMIN_CMD = 0xC0484E41 -AMZN_NVME_VID = 0x1D0F -AMZN_NVME_EBS_MN = "Amazon Elastic Block Store" - - -class nvme_admin_command(Structure): - _pack_ = 1 - _fields_ = [("opcode", c_uint8), # op code - ("flags", c_uint8), # fused operation - ("cid", c_uint16), # command id - ("nsid", c_uint32), # namespace id - ("reserved0", c_uint64), - ("mptr", c_uint64), # metadata pointer - ("addr", c_uint64), # data pointer - ("mlen", c_uint32), # metadata length - ("alen", c_uint32), # data length - ("cdw10", c_uint32), - ("cdw11", c_uint32), - ("cdw12", c_uint32), - ("cdw13", c_uint32), - ("cdw14", c_uint32), - ("cdw15", c_uint32), - ("reserved1", c_uint64)] - - -class nvme_identify_controller_amzn_vs(Structure): - _pack_ = 1 - _fields_ = [("bdev", c_char * 32), # block device name - ("reserved0", c_char * (1024 - 32))] - - -class nvme_identify_controller_psd(Structure): - _pack_ = 1 - _fields_ = [("mp", c_uint16), # maximum power - ("reserved0", c_uint16), - ("enlat", c_uint32), # entry latency - ("exlat", c_uint32), # exit latency - ("rrt", c_uint8), # relative read throughput - ("rrl", c_uint8), # relative read latency - ("rwt", c_uint8), # relative write throughput - ("rwl", c_uint8), # relative write latency - ("reserved1", c_char * 16)] - - -class nvme_identify_controller(Structure): - _pack_ = 1 - _fields_ = [("vid", c_uint16), # PCI Vendor ID - ("ssvid", c_uint16), # PCI Subsystem Vendor ID - ("sn", c_char * 20), # Serial Number - ("mn", c_char * 40), # Module Number - ("fr", c_char * 8), # Firmware Revision - ("rab", c_uint8), # Recommend Arbitration Burst - ("ieee", c_uint8 * 3), # IEEE OUI Identifier - ("mic", c_uint8), # Multi-Interface Capabilities - ("mdts", c_uint8), # Maximum Data Transfer Size - ("reserved0", c_uint8 * (256 - 78)), - ("oacs", c_uint16), # Optional Admin Command Support - ("acl", c_uint8), # Abort Command Limit - ("aerl", c_uint8), # Asynchronous Event Request Limit - ("frmw", c_uint8), # Firmware Updates - ("lpa", c_uint8), # Log Page Attributes - ("elpe", c_uint8), # Error Log Page Entries - ("npss", c_uint8), # Number of Power States Support - ("avscc", c_uint8), # Admin Vendor Specific Command Configuration - ("reserved1", c_uint8 * (512 - 265)), - ("sqes", c_uint8), # Submission Queue Entry Size - ("cqes", c_uint8), # Completion Queue Entry Size - ("reserved2", c_uint16), - ("nn", c_uint32), # Number of Namespaces - ("oncs", c_uint16), # Optional NVM Command Support - ("fuses", c_uint16), # Fused Operation Support - ("fna", c_uint8), # Format NVM Attributes - ("vwc", c_uint8), # Volatile Write Cache - ("awun", c_uint16), # Atomic Write Unit Normal - ("awupf", c_uint16), # Atomic Write Unit Power Fail - ("nvscc", c_uint8), # NVM Vendor Specific Command Configuration - ("reserved3", c_uint8 * (704 - 531)), - ("reserved4", c_uint8 * (2048 - 704)), - ("psd", nvme_identify_controller_psd * 32), # Power State Descriptor - ("vs", nvme_identify_controller_amzn_vs)] # Vendor Specific - - -class ebs_nvme_device: - def __init__(self, device): - self.device = device - self.ctrl_identify() - - def _nvme_ioctl(self, id_response, id_len): - admin_cmd = nvme_admin_command(opcode=NVME_ADMIN_IDENTIFY, addr=id_response, alen=id_len, cdw10=1) - with open(self.device, "rt") as nvme: - ioctl(nvme, NVME_IOCTL_ADMIN_CMD, admin_cmd) - - def ctrl_identify(self): - self.id_ctrl = nvme_identify_controller() - self._nvme_ioctl(addressof(self.id_ctrl), sizeof(self.id_ctrl)) - if self.id_ctrl.vid != AMZN_NVME_VID or self.id_ctrl.mn.decode().strip() != AMZN_NVME_EBS_MN: - raise TypeError("[ERROR] Not an EBS device: '{0}'".format(self.device)) - - def get_volume_id(self): - vol = self.id_ctrl.sn.decode() - if vol.startswith("vol") and vol[3] != "-": - vol = "vol-" + vol[3:] - return vol - - def get_block_device(self, stripped=False): - device = self.id_ctrl.vs.bdev.decode() - if stripped and device.startswith("/dev/"): - device = device[5:] - return device - - -def main(): - if not (len(sys.argv) > 1 and sys.argv[1] == "console"): - module = AnsibleModule(argument_spec={}, supports_check_mode=True) - else: - # For testing without Ansible (e.g on Windows) - class cDummyAnsibleModule(): - params = {} - - def exit_json(self, changed, **kwargs): - print(changed, json.dumps(kwargs, sort_keys=True, indent=4, separators=(',', ': '))) - - def warn(self, msg): - print("[WARNING]: " + msg) - - def fail_json(self, msg): - print("Failed: " + msg) - exit(1) - - module = cDummyAnsibleModule() - - # Get all existing block volumes by key=value, then parse this into a dictionary (which excludes non disk and partition block types, e.g. ram, loop). - lsblk_devices = subprocess.check_output(['lsblk', '-o', 'NAME,TYPE,UUID,FSTYPE,MOUNTPOINT,MODEL,SERIAL', '-P']).decode().rstrip().split('\n') - os_device_names = [dict((map(lambda x: x.strip("\""), sub.split("="))) for sub in dev.split('\" ') if '=' in sub) for dev in lsblk_devices] - os_device_names = [dev for dev in os_device_names if dev['TYPE'] in ['disk', 'part', 'lvm']] - os_device_names.sort(key=lambda k: k['NAME']) - - # Instance stores (AKA ephemeral volumes) do not appear to have a defined endpoint that maps between the /dev/sd[b-e] defined in the instance creation map, and the OS /dev/nvme[0-26]n1 device. - # For this scenario, we can only return the instance stores in the order that they are defined. Because instance stores do not survive a poweroff and cannot be detached and reattached, the order doesn't matter as much. - instance_store_map = [] - with urlopen("http://169.254.169.254/latest/meta-data/block-device-mapping/") as response__block_device_mapping: - block_device_mappings = response__block_device_mapping.read().decode().split("\n") - for block_device_mappings__ephemeral_id in [dev for dev in block_device_mappings if dev.startswith('ephemeral')]: - with urlopen("http://169.254.169.254/latest/meta-data/block-device-mapping/" + block_device_mappings__ephemeral_id) as response__ephemeral_device: - block_device_mappings__ephemeral_mapped = response__ephemeral_device.read().decode() - instance_store_map.append({'ephemeral_id': block_device_mappings__ephemeral_id, 'ephemeral_map': block_device_mappings__ephemeral_mapped}) - - instance_store_count = 0 - for os_device in os_device_names: - os_device_path = "/dev/" + os_device['NAME'] - if os_device['NAME'].startswith("nvme"): - try: - dev = ebs_nvme_device(os_device_path) - except FileNotFoundError as e: - module.fail_json(msg=os_device_path + ": FileNotFoundError" + str(e)) - except TypeError as e: - if instance_store_count < len(instance_store_map): - os_device.update({"device_name_os": os_device_path, "device_name_aws": '/dev/' + instance_store_map[instance_store_count]['ephemeral_map'], "volume_id": dev.get_volume_id()}) - instance_store_count += 1 - else: - module.warn(u"%s is not an EBS device and there is no instance store mapping." % os_device_path) - except OSError as e: - module.warn(u"%s is not an nvme device." % os_device_path) - else: - os_device.update({"device_name_os": os_device_path, "device_name_aws": '/dev/' + dev.get_block_device(stripped=True).rstrip(), "volume_id": dev.get_volume_id()}) - elif os_device['NAME'].startswith("xvd"): - os_device.update({"device_name_os": os_device_path, "device_name_aws": '/dev/' + re.sub(r'xvd(.*)', r'sd\1', os_device['NAME'])}) - else: - os_device.update({"device_name_os": os_device_path, "device_name_aws": ""}) - - module.exit_json(changed=False, device_map=os_device_names) - - -if __name__ == '__main__': - main() diff --git a/_dependencies/library/ebsmap__README.md b/_dependencies/library/ebsmap__README.md deleted file mode 100644 index f38b360c..00000000 --- a/_dependencies/library/ebsmap__README.md +++ /dev/null @@ -1,24 +0,0 @@ -# ebsmap - -This is an Ansible module that is able to map AWS EBS device names (including NVME devices) to the host device names. - -## Credits -The bulk of the heavy lifting is nvme ioctl commands written by AWS for their Amazon Linux AMIs. See: https://docs.aws.amazon.com/AWSEC2/latest/UserGuide/nvme-ebs-volumes.html - -## Execution -This can be run as an Ansible module (needs root): -```yaml -- name: Get the nvme map information - ebsmap: - become: yes - register: r__ebsmap - -- name: ebsmap - debug: msg={{ebsmap}} - -``` - -or from the console: -```bash -python3 ./ebsmap.py console -``` \ No newline at end of file diff --git a/cluster_hosts/tasks/get_cluster_hosts_target.yml b/cluster_hosts/tasks/get_cluster_hosts_target.yml index 4dad737e..9b5f4782 100644 --- a/cluster_hosts/tasks/get_cluster_hosts_target.yml +++ b/cluster_hosts/tasks/get_cluster_hosts_target.yml @@ -25,7 +25,7 @@ # Dynamically look up VPC ID by name from aws - name: get_cluster_hosts_target | Looking up VPC facts to extract ID ec2_vpc_net_info: - region: "{{ cluster_vars.region }}" + region: "{{ cluster_vars.region }}" aws_access_key: "{{ cluster_vars[buildenv].aws_access_key }}" aws_secret_key: "{{ cluster_vars[buildenv].aws_secret_key }}" filters: @@ -40,11 +40,11 @@ - name: get_cluster_hosts_target/aws | Look up proxy subnet facts ec2_vpc_subnet_info: - region: "{{ cluster_vars.region }}" + region: "{{ cluster_vars.region }}" aws_access_key: "{{ cluster_vars[buildenv].aws_access_key }}" aws_secret_key: "{{ cluster_vars[buildenv].aws_secret_key }}" filters: - vpc-id: "{{ vpc_id }}" + vpc-id: "{{ vpc_id }}" register: r__ec2_vpc_subnet_info delegate_to: localhost run_once: true @@ -103,14 +103,18 @@ - name: get_cluster_hosts_target/gcp | GCP-specific modifications to cluster_hosts_target block: - - name: get_cluster_hosts_target/gcp | Update cluster_hosts_target with rootvol_size + - name: get_cluster_hosts_target/gcp | Update cluster_hosts_target auto_volumes with device_name and initialize_params set_fact: - cluster_hosts_target: | - {% set res = cluster_hosts_target -%} - {%- for host in res -%} - {%- set _dummy = host.update({'rootvol_size': cluster_vars[buildenv].hosttype_vars[host.hosttype].rootvol_size | string}) -%} + cluster_hosts_target: |- + {%- for host in cluster_hosts_target -%} + {%- for vol in host.auto_volumes -%} + {%- if 'device_name' not in vol -%} + {%- set _dummy = vol.update({'device_name': host.hostname + '--' + vol.mountpoint | basename }) -%} + {%- set _dummy = vol.update({'initialize_params': {'disk_name': vol.device_name, 'disk_size_gb': vol.volume_size}}) -%} + {%- endif -%} + {%- endfor %} {%- endfor %} - {{ res }} + {{ cluster_hosts_target }} when: cluster_vars.type == "gcp" - name: get_cluster_hosts_target/esxifree | esxifree-specific modifications to cluster_hosts_target diff --git a/config/tasks/disks_auto_aws.yml b/config/tasks/disks_auto_aws_gcp.yml similarity index 50% rename from config/tasks/disks_auto_aws.yml rename to config/tasks/disks_auto_aws_gcp.yml index 479982d9..c30e4be9 100644 --- a/config/tasks/disks_auto_aws.yml +++ b/config/tasks/disks_auto_aws_gcp.yml @@ -1,44 +1,49 @@ --- -- name: disks_auto_aws | cluster_vars[buildenv].hosttype_vars[hostvars[inventory_hostname].hosttype - debug: msg={{ cluster_vars[buildenv].hosttype_vars[hostvars[inventory_hostname].hosttype] }} +- name: disks_auto_aws_gcp | cluster_hosts_target(inventory_hostname) + debug: msg={{ cluster_hosts_target | json_query(\"[?hostname == `\" + inventory_hostname + \"`] \") }} -- name: disks_auto_aws | Mount volumes as individual disks +- name: disks_auto_aws_gcp | Mount block devices as individual disks block: - - name: disks_auto_aws | Get the nvme information (pre-filesystem create) - ebsmap: + - name: disks_auto_aws_gcp | auto_vols + debug: msg={{ auto_vols }} + + - name: disks_auto_aws_gcp | Get the block device information (pre-filesystem create) + blockdevmap: + cloud_type: "{{cluster_vars.type}}" become: yes - register: r__ebsmap + register: r__blockdevmap - - name: disks_auto_aws | r__ebsmap (pre-filesystem create) - debug: msg={{r__ebsmap}} + - name: disks_auto_aws_gcp | r__blockdevmap (pre-filesystem create) + debug: msg={{r__blockdevmap}} - - name: disks_auto_aws | Create filesystem (partitionless) + - name: disks_auto_aws_gcp | Create filesystem (partitionless) become: yes filesystem: fstype: "{{ item.fstype }}" - dev: "{{ (r__ebsmap.device_map | selectattr('device_name_aws', '==', item.device_name) | list | last)['device_name_os'] }}" + dev: "{{ (r__blockdevmap.device_map | selectattr('device_name_cloud', '==', item.device_name) | list | last)['device_name_os'] }}" loop: "{{auto_vols}}" - - name: disks_auto_aws | Get the nvme information (post-filesystem create), to get the block IDs for mounting - ebsmap: + - name: disks_auto_aws_gcp | Get the block device information (post-filesystem create), to get the block IDs for mounting + blockdevmap: + cloud_type: "{{cluster_vars.type}}" become: yes - register: r__ebsmap + register: r__blockdevmap - - name: disks_auto_aws | r__ebsmap (post-filesystem create) - debug: msg={{r__ebsmap}} + - name: disks_auto_aws_gcp | r__blockdevmap (post-filesystem create) + debug: msg={{r__blockdevmap}} - - name: disks_auto_aws | Mount created filesytem(s) persistently + - name: disks_auto_aws_gcp | Mount created filesytem(s) persistently become: yes mount: path: "{{ item.mountpoint }}" - src: "UUID={{ (r__ebsmap.device_map | selectattr('device_name_aws', '==', item.device_name) | list | last)['UUID'] }}" + src: "UUID={{ (r__blockdevmap.device_map | selectattr('device_name_cloud', '==', item.device_name) | list | last)['UUID'] }}" fstype: "{{ item.fstype }}" state: mounted opts: _netdev loop: "{{auto_vols}}" - - name: disks_auto_aws | change ownership of mountpoint (if set) + - name: disks_auto_aws_gcp | change ownership of mountpoint (if set) become: yes file: path: "{{ item.mountpoint }}" @@ -49,70 +54,70 @@ loop: "{{auto_vols}}" - block: - - name: disks_auto_aws | Touch a file with the mountpoint and device name for testing that disk attachment is correct + - name: disks_auto_aws_gcp | Touch a file with the mountpoint and device name for testing that disk attachment is correct become: yes file: path: "{{item.mountpoint}}/__clusterversetest_{{ item.mountpoint | regex_replace('\/', '_') }}_{{ item.device_name | regex_replace('\/', '_') }}" state: touch loop: "{{auto_vols}}" - - name: disks_auto_aws | Find all __clusterversetest_ files in newly mounted disks + - name: disks_auto_aws_gcp | Find all __clusterversetest_ files in newly mounted disks find: paths: "{{item.mountpoint}}" patterns: "__clusterversetest_*" loop: "{{auto_vols}}" register: r__find_test - - name: disks_auto_aws | Display all __clusterversetest_ files in newly mounted disks. + - name: disks_auto_aws_gcp | Display all __clusterversetest_ files in newly mounted disks. debug: msg: "{{ r__find_test | json_query(\"results[].{device_name: item.device_name, mountpoint: item.mountpoint, files: files[].path}\") }}" when: test_touch_disks is defined and test_touch_disks|bool when: (auto_vols | map(attribute='mountpoint') | list | unique | count == auto_vols | map(attribute='mountpoint') | list | count) vars: - auto_vols: "{{ cluster_vars[buildenv].hosttype_vars[hostvars[inventory_hostname].hosttype].auto_volumes }}" + auto_vols: "{{ cluster_hosts_target | json_query(\"[?hostname == `\" + inventory_hostname + \"`].auto_volumes[]\") }}" -# The following block mounts all nvme attached volumes that have a single, common mountpoint, by creating a logical volume -- name: disks_auto_aws | Mount nvme volumes in a single mountpoint through LV/VG +# The following block mounts all attached volumes that have a single, common mountpoint, by creating a logical volume +- name: disks_auto_aws_gcp | Mount block devices in a single LVM mountpoint through LV/VG block: - - name: disks_auto_aws | Install logical volume management tooling. (yum - RedHat/CentOS) + - name: disks_auto_aws_gcp | Install logical volume management tooling. (yum - RedHat/CentOS) become: true yum: name: "lvm*" state: present when: ansible_os_family == 'RedHat' - - name: disks_auto_aws | Get the nvme information (pre-filesystem create) - ebsmap: + - name: disks_auto_aws_gcp | Get the device information (pre-filesystem create) + blockdevmap: become: yes - register: r__ebsmap + register: r__blockdevmap - - name: disks_auto_aws | r__ebsmap (pre-filesystem create) - debug: msg={{r__ebsmap}} + - name: disks_auto_aws_gcp | r__blockdevmap (pre-filesystem create) + debug: msg={{r__blockdevmap}} - - name: disks_auto_aws | Create a volume group from all nvme devices + - name: disks_auto_aws_gcp | Create a volume group from all block devices become: yes lvg: vg: "{{ hosttype_vars.lvmparams.vg_name }}" - pvs: "{{ r__ebsmap.device_map | json_query(\"[?device_name_aws && contains('\" + auto_vol_device_names + \"', device_name_aws)].device_name_os\") | join(',')}}" + pvs: "{{ r__blockdevmap.device_map | json_query(\"[?device_name_cloud && contains('\" + auto_vol_device_names + \"', device_name_cloud)].device_name_os\") | join(',')}}" vars: auto_vol_device_names: "{{hosttype_vars.auto_volumes | map(attribute='device_name') | sort | join(',')}}" - - name: disks_auto_aws | Create a logical volume from volume group + - name: disks_auto_aws_gcp | Create a logical volume from volume group become: yes lvol: vg: "{{ hosttype_vars.lvmparams.vg_name }}" lv: "{{ hosttype_vars.lvmparams.lv_name }}" size: "{{ hosttype_vars.lvmparams.lv_size }}" - - name: disks_auto_aws | Create filesystem(s) on attached nvme volume(s) + - name: disks_auto_aws_gcp | Create filesystem(s) on attached volume(s) become: yes filesystem: fstype: "{{ hosttype_vars.auto_volumes[0].fstype }}" dev: "/dev/{{ hosttype_vars.lvmparams.vg_name }}/{{ hosttype_vars.lvmparams.lv_name }}" force: no - - name: disks_auto_aws | Mount created filesytem(s) persistently + - name: disks_auto_aws_gcp | Mount created filesytem(s) persistently become: yes mount: path: "{{ hosttype_vars.auto_volumes[0].mountpoint }}" @@ -122,22 +127,22 @@ opts: _netdev - block: - - name: disks_auto_aws | Touch a file with the mountpoint and device name for testing that disk attachment is correct + - name: disks_auto_aws_gcp | Touch a file with the mountpoint and device name for testing that disk attachment is correct become: yes file: path: "{{ hosttype_vars.auto_volumes[0].mountpoint }}/__clusterversetest_{{ hosttype_vars.auto_volumes[0].mountpoint | regex_replace('\/', '_') }}" state: touch - - name: disks_auto_aws | Find all __clusterversetest_ files in newly mounted disks + - name: disks_auto_aws_gcp | Find all __clusterversetest_ files in newly mounted disks find: paths: "{{ hosttype_vars.auto_volumes[0].mountpoint }}" patterns: "__clusterversetest_*" register: r__find_test - - name: disks_auto_aws | Display all __clusterversetest_ files in newly mounted disks. + - name: disks_auto_aws_gcp | Display all __clusterversetest_ files in newly mounted disks. debug: msg: "{{ r__find_test | json_query(\"files[].path\") }}" when: test_touch_disks is defined and test_touch_disks|bool when: ('lvmparams' in hosttype_vars) and (hosttype_vars.auto_volumes | map(attribute='mountpoint') | list | unique | count == 1) and (hosttype_vars.auto_volumes | map(attribute='mountpoint') | list | count >= 2) and (hosttype_vars.auto_volumes | map(attribute='fstype') | list | unique | count == 1) vars: - hosttype_vars: "{{ cluster_vars[buildenv].hosttype_vars[hostvars[inventory_hostname].hosttype] }}" + hosttype_vars: "{{ cluster_hosts_target | json_query(\"[?hostname == `\" + inventory_hostname + \"`]\") }}" diff --git a/config/tasks/disks_auto_generic.yml b/config/tasks/disks_auto_generic.yml index 4c30ee1e..475edfa9 100644 --- a/config/tasks/disks_auto_generic.yml +++ b/config/tasks/disks_auto_generic.yml @@ -1,43 +1,34 @@ --- -#- debug: msg={{ cluster_vars[buildenv].hosttype_vars[hostvars[inventory_hostname].hosttype].auto_volumes }} -#- debug: msg={{ ansible_facts.devices }} - -- block: - - name: disks_auto_generic | Get unused block devices - set_fact: - block_devices: "{{ {'dev': item, 'size_b': (ansible_facts.devices[item].sectors|int) * (ansible_facts.devices[item].sectorsize|int)} }}" - with_items: "{{ ansible_facts.devices }}" - register: block_devices_list - when: item | regex_search("nvme|[xvsh]+d") and ansible_facts.devices[item].partitions == {} - - - name: disks_auto_generic | Create unused block devices list - set_fact: - lsblk_volumes: "{{ block_devices_list.results | map(attribute='ansible_facts.block_devices') | select('defined') | list }}" +- name: disks_auto_generic | Get the block device information (pre-filesystem create) + blockdevmap: + cloud_type: "lsblk" + become: yes + register: r__blockdevmap - - name: disks_auto_generic | lsblk_volumes - debug: msg={{ lsblk_volumes }} +- name: disks_auto_generic | r__blockdevmap + debug: msg={{ r__blockdevmap }} - name: disks_auto_generic | Create 'hostvols' fact that contains a list of available host devices (lsblk) mapped to the mountpoints defined in cluster_vars. Allow for multiple disks with same size. set_fact: hostvols: | {% set res = [] -%} - {% set tmp_blkvols = lsblk_volumes -%} + {% set tmp_blkvols = r__blockdevmap.device_map | selectattr('PTTYPE', '==', '') | selectattr('FSTYPE', '==', '') | selectattr('TYPE', '==', 'disk') | list -%} {% set inventory_hostname__no_suffix = inventory_hostname | regex_replace('-(?!.*-).*') -%} {%- for autovol in cluster_hosts_target | to_json | from_json | json_query('[?starts_with(hostname, \'' + inventory_hostname__no_suffix + '\')].auto_volumes[]') -%} {%- set blkvolloop = namespace(break=false) -%} {%- for blkvol in tmp_blkvols if not blkvolloop.break -%} - {%- if (autovol.volume_size*1073741824|int) == (blkvol.size_b|int) -%} - {%- set _ = res.extend([ {'device': '/dev/'+blkvol.dev, 'mountpoint': autovol.mountpoint, 'fstype': autovol.fstype, 'perms': autovol.perms | default({})}]) -%} - {%- set _ = tmp_blkvols.remove(blkvol) -%} + {%- if (autovol.volume_size*1073741824|int) == (blkvol['SIZE']|int) -%} + {%- set _ = res.extend([ {'device': blkvol['device_name_os'], 'mountpoint': autovol.mountpoint, 'fstype': autovol.fstype, 'perms': autovol.perms | default({})}]) -%} {%- set blkvolloop.break = true -%} + {%- set _ = tmp_blkvols.remove(blkvol) -%} {%- endif -%} {%- endfor -%} {%- endfor -%} {{ res }} -#- name: disks_auto_generic | hostvols -# debug: msg={{hostvols}} +- name: disks_auto_generic | hostvols + debug: msg={{hostvols}} # Create partition-less filesystems. - name: disks_auto_generic | Create filesystem(s) on attached volume(s) diff --git a/config/tasks/main.yml b/config/tasks/main.yml index 5a23db5e..b99cc683 100644 --- a/config/tasks/main.yml +++ b/config/tasks/main.yml @@ -52,13 +52,13 @@ mode: 0755 when: (static_journal is defined and static_journal|bool) -- name: Create partition table, format and attach volumes - AWS - include_tasks: disks_auto_aws.yml - when: cluster_vars.type == "aws" +- name: Create partition table, format and attach volumes - AWS or GCP + include_tasks: disks_auto_aws_gcp.yml + when: cluster_vars.type == "aws" or cluster_vars.type == "gcp" - name: Create partition table, format and attach volumes - generic include_tasks: disks_auto_generic.yml - when: cluster_vars.type != "aws" + when: cluster_vars.type != "aws" and cluster_vars.type != "gcp" - name: install prometheus node exporter daemon include_tasks: prometheus_node_exporter.yml @@ -68,13 +68,13 @@ include_tasks: filebeat.yml when: (filebeat_install is defined and filebeat_install|bool and (cluster_vars[buildenv].hosttype_vars[hosttype].skip_beat_install is undefined or (cluster_vars[buildenv].hosttype_vars[hosttype].skip_beat_install is defined and not cluster_vars[buildenv].hosttype_vars[hosttype].skip_beat_install|bool))) vars: - hosttype: "{{cluster_hosts_target | json_query('[? hostname == `' + inventory_hostname + '`].hosttype|[0]') }}" + hosttype: "{{cluster_hosts_target | json_query('[?hostname == `' + inventory_hostname + '`].hosttype|[0]') }}" - name: Install elastic metricbeat include_tasks: metricbeat.yml when: (metricbeat_install is defined and metricbeat_install|bool and (cluster_vars[buildenv].hosttype_vars[hosttype].skip_beat_install is undefined or (cluster_vars[buildenv].hosttype_vars[hosttype].skip_beat_install is defined and not cluster_vars[buildenv].hosttype_vars[hosttype].skip_beat_install|bool))) vars: - hosttype: "{{cluster_hosts_target | json_query('[? hostname == `' + inventory_hostname + '`].hosttype|[0]') }}" + hosttype: "{{cluster_hosts_target | json_query('[?hostname == `' + inventory_hostname + '`].hosttype|[0]') }}" - name: Install security cloud agent include_tasks: cloud_agents.yml diff --git a/create/tasks/gcp.yml b/create/tasks/gcp.yml index 75db396a..84824773 100644 --- a/create/tasks/gcp.yml +++ b/create/tasks/gcp.yml @@ -68,12 +68,20 @@ with_items: "{{ cluster_vars.firewall_rules }}" -- name: create/gcp | Generate GCE ssh public key from the private key provided on the command line - shell: ssh-keygen -y -f "{{ ansible_ssh_private_key_file }}" - register: r__gcp_ssh_pubkey - - name: create/gcp | Create GCP VMs asynchronously and wait for completion block: + - name: create/gcp | Detach volumes from previous instances (during the _scheme_rmvm_keepdisk_rollback redeploy, we only redeploy one host at a time, and it is already powered off) + gce_pd: + credentials_file: "{{gcp_credentials_file}}" + service_account_email: "{{gcp_credentials_json.client_email}}" + project_id: "{{cluster_vars[buildenv].vpc_project_id}}" + zone: "{{cluster_vars.region}}-{{item.az_name}}" + detach_only : yes + state: deleted + instance_name: "{{ item.auto_volume.src.hostname }}" + name: "{{item.auto_volume.src.source_url | basename}}" + loop: "{{ cluster_hosts_target_denormalised_by_volume | selectattr('auto_volume.src', 'defined') | list }}" + - name: create/gcp | Create GCP VMs asynchronously gcp_compute_instance: auth_kind: "serviceaccount" @@ -82,11 +90,10 @@ zone: "{{cluster_vars.region}}-{{item.az_name}}" name: "{{item.hostname}}" machine_type: "{{item.flavor}}" - disks: "{{_host_disks}}" + disks: "{{ [_bootdisk] + (_autodisks | default([])) }}" metadata: startup-script: "{%- if cluster_vars.ssh_guard_whitelist is defined and cluster_vars.ssh_guard_whitelist | length > 0 -%}#! /bin/bash\n\n#Whitelist my inbound IPs\n[ -f /etc/sshguard/whitelist ] && echo \"{{cluster_vars.ssh_guard_whitelist | join ('\n')}}\" >>/etc/sshguard/whitelist && /bin/systemctl restart sshguard{%- endif -%}" - ssh-keys: "{{ cliargs.remote_user }}:{{ r__gcp_ssh_pubkey.stdout }}" - # ssh-keys: "{{ cliargs.remote_user }}:{{ r__gcp_ssh_pubkey.stdout }} {{ cliargs.remote_user }}" + ssh-keys: "{{ cliargs.remote_user }}:{{ lookup('pipe', 'ssh-keygen -y -f ' + ansible_ssh_private_key_file) }} {{ cliargs.remote_user }}" labels: "{{ _labels | combine(cluster_vars.custom_tagslabels | default({})) }}" network_interfaces: - network: "{{ r__gcp_compute_network_info['resources'][0] | default({}) }}" @@ -98,11 +105,10 @@ state: present deletion_protection: "{{cluster_vars[buildenv].deletion_protection}}" vars: - __autodisksnames: "{%- if cluster_vars[buildenv].hosttype_vars[item.hosttype].auto_volumes | length -%}[{%- for vol in cluster_vars[buildenv].hosttype_vars[item.hosttype].auto_volumes -%}{%- set mountname = vol.mountpoint | regex_replace('.*\\/(.*)', '\\\\1') -%}{{vol|combine({'mountname': mountname})}}{% if not loop.last %},{% endif %}{%- endfor -%}]{%- else -%}[]{%- endif-%}" - _autodisks: "{{__autodisksnames | to_json | from_json | json_query(\" [].{auto_delete: auto_delete, interface: interface, device_name: join('',[`\"+item.hostname+\"--`,mountname]), initialize_params: {disk_name: join('',[`\"+item.hostname+\"--`,mountname]), disk_size_gb: volume_size}} \") }}" - _bootdisk: {auto_delete: true, boot: true, device_name: "{{ item.hostname }}--boot", initialize_params: {source_image: "{{cluster_vars.image}}", disk_name: "{{ item.hostname }}--boot", disk_size_gb: "{{item.rootvol_size}}"}} - _host_disks: "{{[_bootdisk] + _autodisks}}" + _bootdisk: {auto_delete: true, boot: true, device_name: "{{ item.hostname }}--boot", initialize_params: {source_image: "{{cluster_vars.image}}", disk_name: "{{ item.hostname }}--boot", disk_size_gb: "{{cluster_vars[buildenv].hosttype_vars[item.hosttype].rootvol_size}}"}} + _autodisks: "{{item.auto_volumes | json_query(\"[].{auto_delete: auto_delete, interface: interface, device_name: device_name, initialize_params: initialize_params, source: {selfLink: src.source_url}}\") }}" _labels: + name: "{{item.hostname}}" inv_node_version: "{{cluster_vars[buildenv].hosttype_vars[item.hosttype].version | default(omit)}}" inv_node_type: "{{item.hosttype}}" hosttype: "{{item.hosttype}}" @@ -112,7 +118,7 @@ maintenance_mode: "true" release: "{{ release_version }}" lifecycle_state: "current" - register: gcp_compute_instance + register: r__gcp_compute_instance with_items: "{{cluster_hosts_target}}" async: 7200 poll: 0 @@ -120,14 +126,115 @@ - name: create/gcp | Wait for GCE instance creation to complete async_status: jid: "{{ item.ansible_job_id }}" - register: gcp_jobs - until: gcp_jobs.finished + register: r__async_status__gcp_compute_instance + until: r__async_status__gcp_compute_instance.finished delay: 3 retries: 300 - with_items: "{{gcp_compute_instance.results}}" + with_items: "{{r__gcp_compute_instance.results}}" + + - name: create/gcp | r__async_status__gcp_compute_instance.results + debug: msg={{r__async_status__gcp_compute_instance.results}} - name: create/gcp | Set a fact containing the newly-created hosts set_fact: cluster_hosts_created: "{{ gcp_jobs.results | json_query(\"[?item.changed==`true`].item.item\") }}" + - name: create/gcp | Label the volumes + gce_labels: + project_id: "{{cluster_vars[buildenv].vpc_project_id}}" + credentials_file: "{{gcp_credentials_file}}" + service_account_email: "{{gcp_credentials_json.client_email}}" + resource_url: "{{item.resource_url}}" + labels: "{{ _labels | combine(cluster_vars.custom_tagslabels | default({})) }}" + with_items: "{{_ec2_vols_denormalised_by_device}}" + vars: + _ec2_vols_denormalised_by_device: |- + {% set res = [] -%} + {%- for host_instance in r__async_status__gcp_compute_instance.results -%} + {%- for disk in host_instance.disks -%} + {% set _ = res.append({'hostname': host_instance.name , 'hosttype': host_instance.labels.hosttype, 'device_name': disk.deviceName, 'disk_name': disk.source | basename, 'resource_url': disk.source, 'regionzone': host_instance.invocation.module_args.zone}) -%} + {%- endfor %} + {%- endfor %} + {{ res }} + _labels: + name: "{{ item.device_name }}" + inv_node_version: "{{cluster_vars[buildenv].hosttype_vars[item.hosttype].version | default(omit)}}" + inv_node_type: "{{item.hosttype}}" + owner: "{{ lookup('env','USER') | lower }}" + release: "{{ release_version }}" + +# - name: create/gcp | Attach (or create) volumes where 'src' is present (e.g. inserted as part of _scheme_rmvm_keepdisk_rollback scheme) +# gce_pd: +# credentials_file: "{{gcp_credentials_file}}" +# service_account_email: "{{gcp_credentials_json.client_email}}" +# project_id: "{{cluster_vars[buildenv].vpc_project_id}}" +# zone: "{{cluster_vars.region}}-{{item.az_name}}" +# delete_on_termination: yes +# disk_type : "{{item.auto_volume.volume_type | default(omit)}}" +# instance_name: "{{ item.hostname }}" +# mode: "READ_WRITE" +# name: "{{item.auto_volume.device_name}}" +# size_gb : "{%- if 'src' not in item.auto_volume -%}{{item.auto_volume.volume_size}}{%- endif -%}" +# loop: "{{ cluster_hosts_target_denormalised_by_volume | selectattr('auto_volume.src', 'defined') | list }}" +# async: 7200 +# poll: 0 +# register: r__gce_pd +# +# - name: create/aws | Wait for volume creation/ attachment to complete +# async_status: { jid: "{{ item.ansible_job_id }}" } +# register: r__async_status__gce_pd +# until: r__async_status__gce_pd.finished +# delay: 3 +# retries: 300 +# with_items: "{{r__gce_pd.results}}" +# +# - name: create/gcp | Get existing GCE instance info (per AZ) +# gcp_compute_instance_info: +# zone: "{{cluster_vars.region}}-{{item}}" +# filters: +# - "labels.cluster_name = {{cluster_name}}" +# - "labels.lifecycle_state = current" +# project: "{{cluster_vars[buildenv].vpc_project_id}}" +# auth_kind: "serviceaccount" +# service_account_file: "{{gcp_credentials_file}}" +# scopes: ["https://www.googleapis.com/auth/compute.readonly"] +# with_items: "{{ cluster_vars[buildenv].hosttype_vars | json_query(\"*[vms_by_az][][keys(@)][][]\") | unique }}" +# register: r__gcp_compute_instance_info +# +# - name: create/gcp | r__gcp_compute_instance_info.results +# debug: msg={{r__gcp_compute_instance_info.results}} +# +# - name: create/gcp | Label the volumes +# gce_labels: +# project_id: "{{cluster_vars[buildenv].vpc_project_id}}" +# credentials_file: "{{gcp_credentials_file}}" +# service_account_email: "{{gcp_credentials_json.client_email}}" +# resource_url: "{{item.resource_url}}" +# labels: "{{ _labels | combine(cluster_vars.custom_tagslabels | default({})) }}" +# with_items: "{{_ec2_vols_denormalised_by_device}}" +# vars: +# _ec2_vols_denormalised_by_device: |- +# {% set res = [] -%} +# {%- for zone_result in r__gcp_compute_instance_info.results -%} +# {%- for host_instance in zone_result.resources -%} +# {%- for disk in host_instance.disks -%} +# {% set _ = res.append({'hostname': host_instance.name , 'hosttype': host_instance.labels.hosttype, 'device_name': disk.deviceName, 'disk_name': disk.source | basename, 'resource_url': disk.source, 'regionzone': zone_result.invocation.module_args.zone}) -%} +# {%- endfor %} +# {%- endfor %} +# {%- endfor %} +# {{ res }} +# _labels: +# name: "{{ item.device_name }}" +# inv_node_version: "{{cluster_vars[buildenv].hosttype_vars[item.hosttype].version | default(omit)}}" +# inv_node_type: "{{item.hosttype}}" +# owner: "{{ lookup('env','USER') | lower }}" +# release: "{{ release_version }}" +# - name: create/gcp | Label the volumes ## DOES NOT ADD / MODITY LABELS ON EXISTING DISKS (cannot use for attaching and relabelling existing disks) +# gcp_compute_disk: +# auth_kind: "serviceaccount" +# service_account_file: "{{gcp_credentials_file}}" +# project: "{{cluster_vars[buildenv].vpc_project_id}}" +# zone: "{{item.regionzone}}" +# name: "{{item.disk_name}}" +# labels: "{{ _labels | combine(cluster_vars.custom_tagslabels | default({})) }}" diff --git a/redeploy/_scheme_rmvm_keepdisk_rollback/tasks/_add_src_diskinfo_to_cluster_hosts_target__aws.yml b/redeploy/_scheme_rmvm_keepdisk_rollback/tasks/_add_src_diskinfo_to_cluster_hosts_target__aws.yml index 07db0737..b916e101 100644 --- a/redeploy/_scheme_rmvm_keepdisk_rollback/tasks/_add_src_diskinfo_to_cluster_hosts_target__aws.yml +++ b/redeploy/_scheme_rmvm_keepdisk_rollback/tasks/_add_src_diskinfo_to_cluster_hosts_target__aws.yml @@ -18,10 +18,10 @@ cluster_hosts_target: | {%- for cht_host in cluster_hosts_target -%} {%- for cht_autovol in cht_host.auto_volumes -%} - {%- for chs_host_info_result in r__ec2_instance_info.instances | selectattr('tags.lifecycle_state', '!=', 'current') -%} - {%- if cht_host.hostname | regex_replace('-(?!.*-).*') == chs_host_info_result.tags.Name | regex_replace('-(?!.*-).*') -%} - {%- for chs_host_diskinfo in chs_host_info_result.block_device_mappings | selectattr('device_name', '==', cht_autovol.device_name) -%} - {%- set _ = cht_autovol.update({'src': {'instance_id': chs_host_info_result.instance_id, 'device_name': chs_host_diskinfo.device_name, 'volume_id': chs_host_diskinfo.ebs.volume_id }}) -%} + {%- for ec2_instance_info_result in r__ec2_instance_info.instances | selectattr('tags.lifecycle_state', '!=', 'current') -%} + {%- if cht_host.hostname | regex_replace('-(?!.*-).*') == ec2_instance_info_result.tags.Name | regex_replace('-(?!.*-).*') -%} + {%- for chs_host_diskinfo in ec2_instance_info_result.block_device_mappings | selectattr('device_name', '==', cht_autovol.device_name) -%} + {%- set _ = cht_autovol.update({'src': {'instance_id': ec2_instance_info_result.instance_id, 'device_name': chs_host_diskinfo.device_name, 'volume_id': chs_host_diskinfo.ebs.volume_id }}) -%} {%- endfor -%} {%- endif -%} {%- endfor -%} diff --git a/redeploy/_scheme_rmvm_keepdisk_rollback/tasks/_add_src_diskinfo_to_cluster_hosts_target__gcp.yml b/redeploy/_scheme_rmvm_keepdisk_rollback/tasks/_add_src_diskinfo_to_cluster_hosts_target__gcp.yml new file mode 100644 index 00000000..5b278a9f --- /dev/null +++ b/redeploy/_scheme_rmvm_keepdisk_rollback/tasks/_add_src_diskinfo_to_cluster_hosts_target__gcp.yml @@ -0,0 +1,39 @@ +--- + +- name: _get_diskinfo_gcp | Get existing GCE instance info (per AZ) + gcp_compute_instance_info: + zone: "{{cluster_vars.region}}-{{item}}" + filters: + - "labels.cluster_name = {{cluster_name}}" + project: "{{cluster_vars[buildenv].vpc_project_id}}" + auth_kind: "serviceaccount" + service_account_file: "{{gcp_credentials_file}}" + scopes: ["https://www.googleapis.com/auth/compute.readonly"] + with_items: "{{ cluster_vars[buildenv].hosttype_vars | json_query(\"*[vms_by_az][][keys(@)][][]\") | unique }}" + register: r__gcp_compute_instance_info + +- name: _get_diskinfo_gcp | r__gcp_compute_instance_info.results + debug: msg={{r__gcp_compute_instance_info.results}} + +- name: _get_diskinfo_gcp | augment/update cluster_hosts_target auto_volumes with source disk info + set_fact: + cluster_hosts_target: | + {%- for cht_host in cluster_hosts_target -%} + {%- for cht_autovol in cht_host.auto_volumes -%} + {%- for gcp_compute_instance_result in r__gcp_compute_instance_info.results | json_query('[].resources[?labels.lifecycle_state != "current"][]') -%} + {%- if cht_host.hostname | regex_replace('-(?!.*-).*') == gcp_compute_instance_result.name | regex_replace('-(?!.*-).*') -%} + {%- for gcp_compute_instance_diskinfo in gcp_compute_instance_result.disks -%} + {%- if cht_autovol.initialize_params.disk_name | regex_replace('(.*)-.*(--.*)', '\\1\\2') == gcp_compute_instance_diskinfo.source | basename | regex_replace('(.*)-.*(--.*)', '\\1\\2') -%} + {%- set _ = cht_autovol.update({'device_name': gcp_compute_instance_diskinfo.source | basename}) -%} + {%- set _ = cht_autovol.update({'src': {'hostname': gcp_compute_instance_result.name, 'device_name': cht_autovol.device_name, 'source_url': gcp_compute_instance_diskinfo.source }}) -%} + {%- set _ = cht_autovol.update({'initialize_params': {'disk_name': cht_autovol.device_name, 'disk_size_gb': gcp_compute_instance_diskinfo.diskSizeGb}}) -%} + {%- endif -%} + {%- endfor -%} + {%- endif -%} + {%- endfor -%} + {%- endfor -%} + {%- endfor -%} + {{cluster_hosts_target}} + +- name: _get_diskinfo_gcp | cluster_hosts_target + debug: msg={{cluster_hosts_target}} diff --git a/redeploy/_scheme_rmvm_keepdisk_rollback/tasks/preflight.yml b/redeploy/_scheme_rmvm_keepdisk_rollback/tasks/preflight.yml index dbc99a0a..e7091c8d 100644 --- a/redeploy/_scheme_rmvm_keepdisk_rollback/tasks/preflight.yml +++ b/redeploy/_scheme_rmvm_keepdisk_rollback/tasks/preflight.yml @@ -3,7 +3,7 @@ - name: Preflight check block: - block: - - assert: { that: "cluster_vars.type != 'gcp'", fail_msg: "This scheme is not supported on GCP." } +# - assert: { that: "cluster_vars.type != 'gcp'", fail_msg: "This scheme is not supported on GCP." } - name: Preflight check | get ec2_instance_info for current disk information ec2_instance_info: From f5b8913cdcb2e2eef898abe0f9a544487bb3aac2 Mon Sep 17 00:00:00 2001 From: Dougal Seeley Date: Sun, 25 Oct 2020 14:21:32 +0000 Subject: [PATCH 41/58] Fix for _scheme_addnewvm_rmdisk_rollback when using myhosttypes --- .../_scheme_addnewvm_rmdisk_rollback/tasks/main.yml | 8 ++++---- .../tasks/preflight.yml | 4 +--- redeploy/_scheme_rmvm_rmdisk_only/tasks/main.yml | 12 +++++++++--- redeploy/tasks/main.yml | 4 ++-- 4 files changed, 16 insertions(+), 12 deletions(-) diff --git a/redeploy/_scheme_addnewvm_rmdisk_rollback/tasks/main.yml b/redeploy/_scheme_addnewvm_rmdisk_rollback/tasks/main.yml index 2a71a918..d3c7b9d3 100644 --- a/redeploy/_scheme_addnewvm_rmdisk_rollback/tasks/main.yml +++ b/redeploy/_scheme_addnewvm_rmdisk_rollback/tasks/main.yml @@ -5,7 +5,7 @@ - assert: { that: "non_current_hosts | length == 0", msg: "ERROR - All VMs must be in the 'current' lifecycle_state. Those not [{{non_current_hosts | join(',')}}]" } vars: non_current_hosts: "{{ cluster_hosts_state | json_query(\"[?tagslabels.lifecycle_state!='current'].name\") }}" - when: canary=="start" or canary=="none" + when: (canary=="start" or canary=="none") and (myhosttypes is not defined or myhosttypes=='') - name: Redeploy by hosttype; rollback on fail @@ -39,14 +39,14 @@ name: "{{predeleterole}}" when: predeleterole is defined and predeleterole != "" vars: - hosts_to_remove: "{{ cluster_hosts_state | json_query(\"[?tagslabels.lifecycle_state=='retiring' && (contains('RUNNING,running', instance_state))]\") }}" + hosts_to_remove: "{{ hosts_to_stop | json_query(\"[?contains('RUNNING,running', instance_state)]\") }}" - name: Power off any other retiring VM(s) that might exist if we're redeploying to a smaller topology. include_role: name: clusterverse/redeploy/__common tasks_from: poweroff_vms.yml - vars: - hosts_to_stop: "{{ cluster_hosts_state | json_query(\"[?tagslabels.lifecycle_state=='retiring']\") }}" + vars: + hosts_to_stop: "{{ cluster_hosts_state | json_query(\"[?tagslabels.lifecycle_state=='retiring' && ('\"+ myhosttypes|default('') + \"' == '' || contains('\"+ myhosttypes|default('') + \"', tagslabels.hosttype))]\") }}" when: (canary=="finish" or canary=="none") - name: re-acquire cluster_hosts_target and cluster_hosts_state (for tidy - can't be in the tidy block because the block depends on this info being correct) diff --git a/redeploy/_scheme_rmvm_keepdisk_rollback/tasks/preflight.yml b/redeploy/_scheme_rmvm_keepdisk_rollback/tasks/preflight.yml index e7091c8d..251e7cf9 100644 --- a/redeploy/_scheme_rmvm_keepdisk_rollback/tasks/preflight.yml +++ b/redeploy/_scheme_rmvm_keepdisk_rollback/tasks/preflight.yml @@ -3,8 +3,6 @@ - name: Preflight check block: - block: -# - assert: { that: "cluster_vars.type != 'gcp'", fail_msg: "This scheme is not supported on GCP." } - - name: Preflight check | get ec2_instance_info for current disk information ec2_instance_info: filters: { "instance-state-name": [ "running", "stopped" ], "tag:cluster_name": "{{cluster_name}}", "tag:lifecycle_state": "current" } @@ -34,6 +32,6 @@ - assert: { that: "non_current_hosts | length == 0", fail_msg: "ERROR - All VMs must be in the 'current' lifecycle_state. Those not [{{non_current_hosts | join(',')}}]" } vars: { non_current_hosts: "{{ cluster_hosts_state | json_query(\"[?tagslabels.lifecycle_state!='current'].name\") }}" } - when: (canary=="start" or canary=="none") + when: (canary=="start" or canary=="none") and (myhosttypes is not defined or myhosttypes=='') - assert: { that: "(cluster_hosts_state | selectattr('tagslabels.lifecycle_state', '==', 'current') | list | length) == (cluster_hosts_target | length)", fail_msg: "Cannot use this scheme to redeploy to a different-sized cluster" } diff --git a/redeploy/_scheme_rmvm_rmdisk_only/tasks/main.yml b/redeploy/_scheme_rmvm_rmdisk_only/tasks/main.yml index c8f38c96..75e78f71 100644 --- a/redeploy/_scheme_rmvm_rmdisk_only/tasks/main.yml +++ b/redeploy/_scheme_rmvm_rmdisk_only/tasks/main.yml @@ -2,13 +2,19 @@ - name: Preflight check block: - - assert: - that: "{{chs_hosts | difference(chf_hosts) | length==0}}" - msg: "Cannot use this scheme to redeploy to smaller cluster; [{{ chs_hosts | join(',') }}] > [{{ chf_hosts | join(',') }}]" + - assert: { that: "non_current_hosts | length == 0", msg: "ERROR - All VMs must be in the 'current' lifecycle_state. Those not [{{non_current_hosts | join(',')}}]" } + vars: + non_current_hosts: "{{ cluster_hosts_state | json_query(\"[?tagslabels.lifecycle_state!='current'].name\") }}" + when: canary=="start" or canary=="none" + + - assert: { that: "{{chs_hosts | difference(chf_hosts) | length==0}}", fail_msg: "Cannot use this scheme to redeploy to smaller cluster; [{{ chs_hosts | join(',') }}] > [{{ chf_hosts | join(',') }}]" } vars: chf_hosts: "{{ cluster_hosts_target | json_query(\"[].hostname\") | map('regex_replace', '-(?!.*-).*') | list }}" chs_hosts: "{{ cluster_hosts_state | json_query(\"[].name\") | map('regex_replace', '-(?!.*-).*') | list }}" + - assert: { that: "canary != 'tidy'", fail_msg: "'tidy' is not valid for this redeploy scheme" } + + - name: Run redeploy per hosttype. Delete one at a time, then reprovision. include_tasks: by_hosttype.yml with_items: "{{ myhosttypes_array }}" diff --git a/redeploy/tasks/main.yml b/redeploy/tasks/main.yml index e4788e94..56153831 100644 --- a/redeploy/tasks/main.yml +++ b/redeploy/tasks/main.yml @@ -3,14 +3,14 @@ - name: Preflight check - Redeploy block: - assert: { that: "clean is not defined", msg: "Must not set the 'clean' variable for a redeploy" } - - assert: { that: "canary is defined and (canary is defined and canary in ['start', 'finish', 'none', 'tidy', 'revert'])", msg: "Canary must be 'start', 'finish', 'none', 'tidy' or 'revert'" } + - assert: { that: "canary is defined and (canary is defined and canary in ['start', 'finish', 'none', 'tidy'])", msg: "Canary must be 'start', 'finish', 'none' or 'tidy'" } - assert: { that: "redeploy_scheme is defined and redeploy_scheme in redeploy_schemes_supported" } - assert: { that: "cluster_hosts_state | length", msg: "Redeploy only possible with an existing cluster." } - name: "Run the {{redeploy_scheme}} redploy scheme" include_role: name: "{{role_path}}/{{redeploy_scheme}}" - when: redeploy_scheme is defined + when: redeploy_scheme is defined - name: Get the final dynamic inventory (to write out current) From b6e57801402579a54753a65573ed5551b327a358 Mon Sep 17 00:00:00 2001 From: Dougal Seeley Date: Sun, 25 Oct 2020 22:09:05 +0000 Subject: [PATCH 42/58] Rename _scheme_rmvm_keepdisk_only__copy_or_move variable to _scheme_rmvm_keepdisk_rollback__copy_or_move --- .../_add_src_diskinfo_to_cluster_hosts_target__esxifree.yml | 4 ++-- redeploy/_scheme_rmvm_keepdisk_rollback/tasks/preflight.yml | 3 +++ 2 files changed, 5 insertions(+), 2 deletions(-) diff --git a/redeploy/_scheme_rmvm_keepdisk_rollback/tasks/_add_src_diskinfo_to_cluster_hosts_target__esxifree.yml b/redeploy/_scheme_rmvm_keepdisk_rollback/tasks/_add_src_diskinfo_to_cluster_hosts_target__esxifree.yml index ee89196e..d8e791e2 100644 --- a/redeploy/_scheme_rmvm_keepdisk_rollback/tasks/_add_src_diskinfo_to_cluster_hosts_target__esxifree.yml +++ b/redeploy/_scheme_rmvm_keepdisk_rollback/tasks/_add_src_diskinfo_to_cluster_hosts_target__esxifree.yml @@ -15,7 +15,7 @@ # debug: msg={{r__vmware_guest_disk_info}} - assert: { that: "r__vmware_guest_disk_info | json_query(\"results[].guest_disk_info.*[] | [?backing_datastore!='\" + cluster_vars.datastore + \"']\") | length == 0", msg: "Move is only possible if disks are on the same datastore." } - when: _scheme_rmvm_keepdisk_only__copy_or_move == "move" + when: _scheme_rmvm_keepdisk_rollback__copy_or_move == "move" - name: _get_diskinfo_esxifree | augment cluster_hosts_target auto_volumes with source disk info set_fact: @@ -25,7 +25,7 @@ {%- for chs_host_info_result in r__vmware_guest_disk_info.results -%} {%- if cht_host.hostname | regex_replace('-(?!.*-).*') == chs_host_info_result.item.name | regex_replace('-(?!.*-).*') -%} {%- for chs_host_diskinfo in chs_host_info_result.guest_disk_info | to_json | from_json | json_query('*| [?unit_number!=`0` && backing_type==\'FlatVer2\' && contains(backing_filename, \'--' + cht_autovol.volname + '.vmdk\')]') -%} - {%- set _ = cht_autovol.update({'volume_size': (chs_host_diskinfo.capacity_in_bytes/1073741824)|int, 'src': {'backing_filename': chs_host_diskinfo.backing_filename, 'copy_or_move': _scheme_rmvm_keepdisk_only__copy_or_move }}) -%} + {%- set _ = cht_autovol.update({'volume_size': (chs_host_diskinfo.capacity_in_bytes/1073741824)|int, 'src': {'backing_filename': chs_host_diskinfo.backing_filename, 'copy_or_move': _scheme_rmvm_keepdisk_rollback__copy_or_move }}) -%} {%- endfor -%} {%- endif -%} {%- endfor -%} diff --git a/redeploy/_scheme_rmvm_keepdisk_rollback/tasks/preflight.yml b/redeploy/_scheme_rmvm_keepdisk_rollback/tasks/preflight.yml index 251e7cf9..4a00f6fd 100644 --- a/redeploy/_scheme_rmvm_keepdisk_rollback/tasks/preflight.yml +++ b/redeploy/_scheme_rmvm_keepdisk_rollback/tasks/preflight.yml @@ -30,6 +30,9 @@ {{ testloop.is_not_subset }} when: cluster_vars.type == "aws" + - assert: { that: "_scheme_rmvm_keepdisk_rollback__copy_or_move is defined and _scheme_rmvm_keepdisk_rollback__copy_or_move in ['copy', 'move']", fail_msg: "ERROR - _scheme_rmvm_keepdisk_rollback__copy_or_move must be defined and set to either 'copy' or 'move'" } + when: cluster_vars.type == "esxifree" + - assert: { that: "non_current_hosts | length == 0", fail_msg: "ERROR - All VMs must be in the 'current' lifecycle_state. Those not [{{non_current_hosts | join(',')}}]" } vars: { non_current_hosts: "{{ cluster_hosts_state | json_query(\"[?tagslabels.lifecycle_state!='current'].name\") }}" } when: (canary=="start" or canary=="none") and (myhosttypes is not defined or myhosttypes=='') From 22cbb302cf532f916e9296fb66fa7cab2805aebe Mon Sep 17 00:00:00 2001 From: Dougal Seeley Date: Sun, 25 Oct 2020 22:48:49 +0000 Subject: [PATCH 43/58] Rebase fixes --- EXAMPLE/cluster.yml | 24 +++------- EXAMPLE/redeploy.yml | 12 +---- .../tasks/get_cluster_hosts_state.yml | 45 +++++++++++++++++++ dynamic_inventory/tasks/main.yml | 4 +- 4 files changed, 55 insertions(+), 30 deletions(-) diff --git a/EXAMPLE/cluster.yml b/EXAMPLE/cluster.yml index 25102511..5df25bd9 100644 --- a/EXAMPLE/cluster.yml +++ b/EXAMPLE/cluster.yml @@ -1,40 +1,30 @@ --- -- name: Download required roles - hosts: localhost:all - connection: local - tasks: - - name: "ansible-galaxy install" - local_action: command ansible-galaxy install -r requirements.yml - delegate_to: localhost - run_once: true - tags: [always] - - name: Deploy the cluster hosts: localhost connection: local gather_facts: no tasks: - - { name: "Get dependent roles via ansible-galaxy", local_action: "command ansible-galaxy install -r requirements.yml", tags: ["always"] } + - { name: "Get dependent roles via ansible-galaxy", local_action: "command ansible-galaxy install -fr requirements.yml", tags: ["always"] } - - { include_role: { name: "clusterverse/clean", apply: {tags: &roletag_clean ["clusterverse_clean"]} }, tags: *roletag_clean, when: "clean is defined" } - - { include_role: { name: "clusterverse/create", apply: {tags: &roletag_create ["clusterverse_create"]} }, tags: *roletag_create } - - { include_role: { name: "clusterverse/dynamic_inventory", apply: {tags: &roletag_dynamic_inventory ["clusterverse_dynamic_inventory"]} }, tags: *roletag_dynamic_inventory } + - { include_role: { name: "clusterverse/clean", apply: { tags: ["clusterverse_clean"]} }, tags: ["clusterverse_clean"], when: "clean is defined" } + - { include_role: { name: "clusterverse/create", apply: { tags: ["clusterverse_create"]} }, tags: ["clusterverse_create"] } + - { include_role: { name: "clusterverse/dynamic_inventory", apply: { tags: ["clusterverse_dynamic_inventory"]} }, tags: ["clusterverse_dynamic_inventory"] } - name: Configure the cluster hosts: all tasks: - - { include_role: { name: "clusterverse/config", apply: {tags: &roletag_config ["clusterverse_config"]} }, tags: *roletag_config } + - { include_role: { name: "clusterverse/config", apply: { tags: ["clusterverse_config"]} }, tags: ["clusterverse_config"] } ## Application roles - name: Application roles hosts: all tasks: - - { include_role: { name: "testrole", apply: {tags: &roletag_testrole ["testrole"]} }, tags: *roletag_testrole } + - { include_role: { name: "testrole", apply: { tags: ["testrole"]} }, tags: ["testrole"] } ## - name: Perform cluster readiness operations hosts: localhost connection: local tasks: - - { include_role: { name: "clusterverse/readiness", apply: {tags: &roletag_readiness ["clusterverse_readiness"]} }, tags: *roletag_readiness } + - { include_role: { name: "clusterverse/readiness", apply: { tags: ["clusterverse_readiness"]} }, tags: ["clusterverse_readiness"] } diff --git a/EXAMPLE/redeploy.yml b/EXAMPLE/redeploy.yml index 65ffac69..4fc6fac6 100644 --- a/EXAMPLE/redeploy.yml +++ b/EXAMPLE/redeploy.yml @@ -1,21 +1,11 @@ --- -- name: Download required roles - hosts: localhost:all - connection: local - tasks: - - name: "ansible-galaxy install" - local_action: command ansible-galaxy install -r requirements.yml - delegate_to: localhost - run_once: true - tags: [always] - - name: Redeploy hosts: localhost connection: local tasks: - name: "Get dependent roles via ansible-galaxy" - local_action: "command ansible-galaxy install -r requirements.yml" + local_action: "command ansible-galaxy install -fr requirements.yml" tags: ["always"] - name: Run redeploy diff --git a/cluster_hosts/tasks/get_cluster_hosts_state.yml b/cluster_hosts/tasks/get_cluster_hosts_state.yml index ff0fca7e..7d50dafa 100644 --- a/cluster_hosts/tasks/get_cluster_hosts_state.yml +++ b/cluster_hosts/tasks/get_cluster_hosts_state.yml @@ -47,6 +47,51 @@ _cluster_hosts_state__urlregion: "{{r__gcp_compute_instance_info.results | json_query(\"[?resources[?labels]].resources[].{name: name, regionzone: zone, tagslabels: labels, instance_id: id, instance_state: status}\") }}" when: cluster_vars.type == "gcp" +- name: get_cluster_hosts_state_esxifree | Get VMware cluster_hosts_state + block: + - name: get_cluster_hosts_state_esxifree | Get existing VMware instance info + vmware_vm_info: + username: "{{ cluster_vars.username }}" + password: "{{ cluster_vars.password }}" + hostname: "{{ cluster_vars.esxi_ip }}" + validate_certs: no + register: r__vmware_vm_info + delegate_to: localhost + run_once: true + + - name: get_cluster_hosts_state_esxifree | Get existing VMware instance facts + vmware_guest_info: + username: "{{ cluster_vars.username }}" + password: "{{ cluster_vars.password }}" + hostname: "{{ cluster_vars.esxi_ip }}" + validate_certs: no + datacenter: None + uuid: "{{item.uuid}}" + with_items: "{{ r__vmware_vm_info.virtual_machines | to_json | from_json | json_query(\"[?starts_with(guest_name, '\"+cluster_name+\"')]\") }}" + register: r__vmware_guest_info + delegate_to: localhost + run_once: true + + ## esxifree hosts must use the esxi 'annotations' field as json. They are stored as unconventional text in the vmx file, so must + ## be converted into inline-json within the facts. If the annotation field is not convertible to json, then we don't consider this VM part of the cluster. + - name: get_cluster_hosts_state_esxifree | update r__vmware_guest_info result with json-parsed annotations + set_fact: + r__vmware_guest_info: | + {% set res = {'results': []} -%} + {%- for result in r__vmware_guest_info.results -%} + {%- set loadloose_res = result.instance.annotation | json_loads_loose -%} + {%- if loadloose_res | type_debug == 'dict' or loadloose_res | type_debug == 'list' -%} + {%- set _ = result.instance.update({'annotation': loadloose_res}) -%} + {%- set _ = res.results.append(result) -%} + {%- endif -%} + {%- endfor -%} + {{ res }} + + - name: get_cluster_hosts_state_esxifree | Set cluster_hosts_state + set_fact: + cluster_hosts_state: "{{ r__vmware_guest_info.results | json_query(\"[].{name: instance.hw_name, regionzone: None, tagslabels: instance.annotation, instance_id: instance.moid, instance_state: instance.hw_power_status}\") }}" + + when: cluster_vars.type == "esxifree" - name: get_cluster_hosts_state | cluster_hosts_state diff --git a/dynamic_inventory/tasks/main.yml b/dynamic_inventory/tasks/main.yml index 8647b8e2..0c1925a0 100644 --- a/dynamic_inventory/tasks/main.yml +++ b/dynamic_inventory/tasks/main.yml @@ -24,7 +24,7 @@ - name: dynamic_inventory | Add hosts to dynamic inventory add_host: name: "{{ item.hostname }}" - groups: "{{ item.hosttype }},{{ cluster_name }},{{ clusterid }}{%- if 'regionzone' in item -%},{{ item.regionzone }}{%- endif -%}" + groups: "{{ item.hosttype }},{{ cluster_name }},{{ clusterid }}{% if 'regionzone' in item %},{{ item.regionzone }}{% endif %}" ansible_host: "{{ item.inventory_ip }}" hosttype: "{{ item.hosttype }}" regionzone: "{{ item.regionzone | default(omit) }}" @@ -42,7 +42,7 @@ {% if groupname not in ["all", "ungrouped"] -%} [{{ groupname }}] {% for hostname in groups[groupname] %} - {{ hostname }} ansible_host={{hostvars[hostname].ansible_host}} hosttype={{ hostvars[hostname].hosttype }} {%- if 'regionzone' in hostvars[hostname] -%}regionzone={{ hostvars[hostname].regionzone }}{%- endif -%} + {{ hostname }} ansible_host={{hostvars[hostname].ansible_host}} hosttype={{ hostvars[hostname].hosttype }} {% if 'regionzone' in hostvars[hostname] %}regionzone={{ hostvars[hostname].regionzone }}{% endif %}{{''}} {% endfor %} {% endif %} From a703e809b15a8b913afa70672b8f222839bd0166 Mon Sep 17 00:00:00 2001 From: Dougal Seeley Date: Mon, 26 Oct 2020 07:16:03 +0000 Subject: [PATCH 44/58] Fix for generic disk mapping during redeploy --- config/tasks/disks_auto_generic.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/config/tasks/disks_auto_generic.yml b/config/tasks/disks_auto_generic.yml index 475edfa9..33ae8746 100644 --- a/config/tasks/disks_auto_generic.yml +++ b/config/tasks/disks_auto_generic.yml @@ -13,7 +13,7 @@ set_fact: hostvols: | {% set res = [] -%} - {% set tmp_blkvols = r__blockdevmap.device_map | selectattr('PTTYPE', '==', '') | selectattr('FSTYPE', '==', '') | selectattr('TYPE', '==', 'disk') | list -%} + {% set tmp_blkvols = r__blockdevmap.device_map | selectattr('PTTYPE', '==', '') | selectattr('MOUNTPOINT', '==', '') | selectattr('TYPE', '==', 'disk') | list -%} {% set inventory_hostname__no_suffix = inventory_hostname | regex_replace('-(?!.*-).*') -%} {%- for autovol in cluster_hosts_target | to_json | from_json | json_query('[?starts_with(hostname, \'' + inventory_hostname__no_suffix + '\')].auto_volumes[]') -%} {%- set blkvolloop = namespace(break=false) -%} From d954dc7bb7cb91731bc67b613790b681fef3d63c Mon Sep 17 00:00:00 2001 From: Dougal Seeley Date: Tue, 27 Oct 2020 06:49:23 +0000 Subject: [PATCH 45/58] Update blockdevmap.py to be python2 compatible and have better compatibility with older version of lsblk (often on even recent RHEL) Do not attempt to remap disks that are already mapped (only a problem if the mapping has changed between runs of cluster.yml) --- .../group_vars/test_gcp_euw1/cluster_vars.yml | 2 +- _dependencies/library/blockdevmap.py | 21 ++++++++++--------- config/tasks/disks_auto_aws_gcp.yml | 10 +++++++-- config/tasks/disks_auto_generic.yml | 2 +- 4 files changed, 21 insertions(+), 14 deletions(-) diff --git a/EXAMPLE/group_vars/test_gcp_euw1/cluster_vars.yml b/EXAMPLE/group_vars/test_gcp_euw1/cluster_vars.yml index e4390373..c7bd7e5a 100644 --- a/EXAMPLE/group_vars/test_gcp_euw1/cluster_vars.yml +++ b/EXAMPLE/group_vars/test_gcp_euw1/cluster_vars.yml @@ -4,7 +4,7 @@ gcp_credentials_file: "{{ lookup('env','GCP_CREDENTIALS') | default('/dev/null', true) }}" gcp_credentials_json: "{{ lookup('file', gcp_credentials_file) | default({'project_id': 'GCP_CREDENTIALS__NOT_SET','client_email': 'GCP_CREDENTIALS__NOT_SET'}, true) }}" -redeploy_schemes_supported: ['_scheme_addallnew_rmdisk_rollback', '_scheme_addnewvm_rmdisk_rollback', '_scheme_rmvm_rmdisk_only'] +redeploy_schemes_supported: ['_scheme_addallnew_rmdisk_rollback', '_scheme_addnewvm_rmdisk_rollback', '_scheme_rmvm_rmdisk_only', '_scheme_rmvm_keepdisk_rollback'] #redeploy_scheme: _scheme_addallnew_rmdisk_rollback #redeploy_scheme: _scheme_addnewvm_rmdisk_rollback diff --git a/_dependencies/library/blockdevmap.py b/_dependencies/library/blockdevmap.py index 3581f437..f227e218 100644 --- a/_dependencies/library/blockdevmap.py +++ b/_dependencies/library/blockdevmap.py @@ -219,7 +219,7 @@ def __init__(self, module, **kwds): def get_lsblk(self): # Get all existing block volumes by key=value, then parse this into a dictionary (which excludes non disk and partition block types, e.g. ram, loop). - lsblk_devices = subprocess.check_output(['lsblk', '-o', 'NAME,TYPE,UUID,FSTYPE,MOUNTPOINT,MODEL,PTTYPE,SERIAL,SIZE', '-P', '-b']).decode().rstrip().split('\n') + lsblk_devices = subprocess.check_output(['lsblk', '-o', 'NAME,TYPE,UUID,FSTYPE,MOUNTPOINT,MODEL,SERIAL,SIZE', '-P', '-b']).decode().rstrip().split('\n') os_device_names = [dict((map(lambda x: x.strip("\""), sub.split("="))) for sub in dev.split('\" ') if '=' in sub) for dev in lsblk_devices] os_device_names = [dev for dev in os_device_names if dev['TYPE'] in ['disk', 'part', 'lvm']] os_device_names.sort(key=lambda k: k['NAME']) @@ -228,7 +228,7 @@ def get_lsblk(self): class cLsblkMapper(cBlockDevMap): def __init__(self, **kwds): - super().__init__(**kwds) + super(cLsblkMapper, self).__init__(**kwds) self.device_map = self.get_lsblk() for os_device in self.device_map: @@ -237,7 +237,7 @@ def __init__(self, **kwds): class cGCPMapper(cBlockDevMap): def __init__(self, **kwds): - super().__init__(**kwds) + super(cGCPMapper, self).__init__(**kwds) self.device_map = self.get_lsblk() @@ -247,16 +247,17 @@ def __init__(self, **kwds): class cAwsMapper(cBlockDevMap): def __init__(self, **kwds): - super().__init__(**kwds) + super(cAwsMapper, self).__init__(**kwds) # Instance stores (AKA ephemeral volumes) do not appear to have a defined endpoint that maps between the /dev/sd[b-e] defined in the instance creation map, and the OS /dev/nvme[0-26]n1 device. # For this scenario, we can only return the instance stores in the order that they are defined. Because instance stores do not survive a poweroff and cannot be detached and reattached, the order doesn't matter as much. instance_store_map = [] - with urlopen("http://169.254.169.254/latest/meta-data/block-device-mapping/") as response__block_device_mapping: - block_device_mappings = response__block_device_mapping.read().decode().split("\n") - for block_device_mappings__ephemeral_id in [dev for dev in block_device_mappings if dev.startswith('ephemeral')]: - with urlopen("http://169.254.169.254/latest/meta-data/block-device-mapping/" + block_device_mappings__ephemeral_id) as response__ephemeral_device: - block_device_mappings__ephemeral_mapped = response__ephemeral_device.read().decode() - instance_store_map.append({'ephemeral_id': block_device_mappings__ephemeral_id, 'ephemeral_map': block_device_mappings__ephemeral_mapped}) + + response__block_device_mapping = urlopen('http://169.254.169.254/latest/meta-data/block-device-mapping/') + block_device_mappings = response__block_device_mapping.read().decode().split("\n") + for block_device_mappings__ephemeral_id in [dev for dev in block_device_mappings if dev.startswith('ephemeral')]: + with urlopen("http://169.254.169.254/latest/meta-data/block-device-mapping/" + block_device_mappings__ephemeral_id) as response__ephemeral_device: + block_device_mappings__ephemeral_mapped = response__ephemeral_device.read().decode() + instance_store_map.append({'ephemeral_id': block_device_mappings__ephemeral_id, 'ephemeral_map': block_device_mappings__ephemeral_mapped}) instance_store_count = 0 self.device_map = self.get_lsblk() diff --git a/config/tasks/disks_auto_aws_gcp.yml b/config/tasks/disks_auto_aws_gcp.yml index c30e4be9..beda3012 100644 --- a/config/tasks/disks_auto_aws_gcp.yml +++ b/config/tasks/disks_auto_aws_gcp.yml @@ -21,8 +21,11 @@ become: yes filesystem: fstype: "{{ item.fstype }}" - dev: "{{ (r__blockdevmap.device_map | selectattr('device_name_cloud', '==', item.device_name) | list | last)['device_name_os'] }}" + dev: "{{ _dev }}" loop: "{{auto_vols}}" + vars: + _dev: "{{ r__blockdevmap.device_map | json_query(\"[?device_name_cloud == `\" + item.device_name + \"` && TYPE==`disk` && FSTYPE==`` && MOUNTPOINT==``].device_name_os | [0]\") }}" + when: _dev is defined and _dev != '' - name: disks_auto_aws_gcp | Get the block device information (post-filesystem create), to get the block IDs for mounting blockdevmap: @@ -37,11 +40,14 @@ become: yes mount: path: "{{ item.mountpoint }}" - src: "UUID={{ (r__blockdevmap.device_map | selectattr('device_name_cloud', '==', item.device_name) | list | last)['UUID'] }}" + src: "UUID={{ _UUID }}" fstype: "{{ item.fstype }}" state: mounted opts: _netdev loop: "{{auto_vols}}" + vars: + _UUID: "{{ r__blockdevmap.device_map | json_query(\"[?device_name_cloud == `\" + item.device_name + \"` && TYPE==`disk` && MOUNTPOINT==``].UUID | [0]\") }}" + when: _UUID is defined and _UUID != '' - name: disks_auto_aws_gcp | change ownership of mountpoint (if set) become: yes diff --git a/config/tasks/disks_auto_generic.yml b/config/tasks/disks_auto_generic.yml index 33ae8746..9305fc1e 100644 --- a/config/tasks/disks_auto_generic.yml +++ b/config/tasks/disks_auto_generic.yml @@ -13,7 +13,7 @@ set_fact: hostvols: | {% set res = [] -%} - {% set tmp_blkvols = r__blockdevmap.device_map | selectattr('PTTYPE', '==', '') | selectattr('MOUNTPOINT', '==', '') | selectattr('TYPE', '==', 'disk') | list -%} + {% set tmp_blkvols = r__blockdevmap.device_map | selectattr('TYPE', '==', 'disk') | selectattr('MOUNTPOINT', '==', '') | list -%} {% set inventory_hostname__no_suffix = inventory_hostname | regex_replace('-(?!.*-).*') -%} {%- for autovol in cluster_hosts_target | to_json | from_json | json_query('[?starts_with(hostname, \'' + inventory_hostname__no_suffix + '\')].auto_volumes[]') -%} {%- set blkvolloop = namespace(break=false) -%} From 9c8a551e01d72347fe711f272d79b0a6484f7c36 Mon Sep 17 00:00:00 2001 From: Dougal Seeley Date: Tue, 3 Nov 2020 23:12:05 +0000 Subject: [PATCH 46/58] Update esxifree_guest.py: Set default disk.EnableUUID to "TRUE" so that the VMDK always presents a consistent UUID to the VM Update blockdevmap.py: Look up the partition type via udevadm, because the NVMe controller's 'Vendor' section (added by AWS) is returned for the disk as well as the disk's first partition, so we can't distinguish between an empty disk and an empty partition. --- _dependencies/library/blockdevmap.py | 13 +++++++++++-- _dependencies/library/esxifree_guest.py | 1 + config/tasks/disks_auto_aws_gcp.yml | 4 ++-- config/tasks/disks_auto_generic.yml | 2 +- 4 files changed, 15 insertions(+), 5 deletions(-) diff --git a/_dependencies/library/blockdevmap.py b/_dependencies/library/blockdevmap.py index f227e218..ac6eba3e 100644 --- a/_dependencies/library/blockdevmap.py +++ b/_dependencies/library/blockdevmap.py @@ -209,7 +209,7 @@ class nvme_identify_controller(Structure): ("reserved3", c_uint8 * (704 - 531)), ("reserved4", c_uint8 * (2048 - 704)), ("psd", nvme_identify_controller_psd * 32), # Power State Descriptor - ("vs", nvme_identify_controller_amzn_vs)] # Vendor Specific + ("vs", nvme_identify_controller_amzn_vs)] # Vendor Specific. NOTE: AWS add the mapping here for both the root *and* the first partition. class cBlockDevMap(object): @@ -218,11 +218,20 @@ def __init__(self, module, **kwds): self.device_map = [] def get_lsblk(self): - # Get all existing block volumes by key=value, then parse this into a dictionary (which excludes non disk and partition block types, e.g. ram, loop). + # Get all existing block volumes by key=value, then parse this into a dictionary (which excludes non disk and partition block types, e.g. ram, loop). Cannot use the --json output as it not supported on older versions of lsblk (e.g. CentOS 7) lsblk_devices = subprocess.check_output(['lsblk', '-o', 'NAME,TYPE,UUID,FSTYPE,MOUNTPOINT,MODEL,SERIAL,SIZE', '-P', '-b']).decode().rstrip().split('\n') os_device_names = [dict((map(lambda x: x.strip("\""), sub.split("="))) for sub in dev.split('\" ') if '=' in sub) for dev in lsblk_devices] os_device_names = [dev for dev in os_device_names if dev['TYPE'] in ['disk', 'part', 'lvm']] os_device_names.sort(key=lambda k: k['NAME']) + + # Get the partition table type. Useful to know in case we are checking whether this block device is partition-less. Cannot use the PTTYPE option to lsblk above, as it is not supported in earlier versions of lsblk (e.g. CentOS7) + for os_device in os_device_names: + udevadm_output_lines = subprocess.check_output(['udevadm', 'info', '--query=property', '--name', os_device['NAME']]).decode().rstrip().split('\n') + udevadm_output = dict(s.split('=',1) for s in udevadm_output_lines) + if 'ID_PART_TABLE_TYPE' in udevadm_output: + os_device.update({"parttable_type": udevadm_output['ID_PART_TABLE_TYPE']}) + else: + os_device.update({"parttable_type": ""}) return os_device_names diff --git a/_dependencies/library/esxifree_guest.py b/_dependencies/library/esxifree_guest.py index 5523a089..d9a16062 100644 --- a/_dependencies/library/esxifree_guest.py +++ b/_dependencies/library/esxifree_guest.py @@ -513,6 +513,7 @@ class esxiFreeScraper(object): vmx_skeleton['tools.syncTime'] = "TRUE" vmx_skeleton['scsi0.virtualDev'] = "pvscsi" vmx_skeleton['scsi0.present'] = "TRUE" + vmx_skeleton['disk.enableuuid'] = "TRUE" def __init__(self, hostname, username='root', password=None, name=None, moid=None): self.soap_client = vmw_soap_client(host=hostname, username=username, password=password) diff --git a/config/tasks/disks_auto_aws_gcp.yml b/config/tasks/disks_auto_aws_gcp.yml index beda3012..4e946242 100644 --- a/config/tasks/disks_auto_aws_gcp.yml +++ b/config/tasks/disks_auto_aws_gcp.yml @@ -24,7 +24,7 @@ dev: "{{ _dev }}" loop: "{{auto_vols}}" vars: - _dev: "{{ r__blockdevmap.device_map | json_query(\"[?device_name_cloud == `\" + item.device_name + \"` && TYPE==`disk` && FSTYPE==`` && MOUNTPOINT==``].device_name_os | [0]\") }}" + _dev: "{{ r__blockdevmap.device_map | json_query(\"[?device_name_cloud == `\" + item.device_name + \"` && TYPE==`disk` && parttable_type==`` && FSTYPE==`` && MOUNTPOINT==``].device_name_os | [0]\") }}" when: _dev is defined and _dev != '' - name: disks_auto_aws_gcp | Get the block device information (post-filesystem create), to get the block IDs for mounting @@ -46,7 +46,7 @@ opts: _netdev loop: "{{auto_vols}}" vars: - _UUID: "{{ r__blockdevmap.device_map | json_query(\"[?device_name_cloud == `\" + item.device_name + \"` && TYPE==`disk` && MOUNTPOINT==``].UUID | [0]\") }}" + _UUID: "{{ r__blockdevmap.device_map | json_query(\"[?device_name_cloud == `\" + item.device_name + \"` && TYPE==`disk` && parttable_type==`` && MOUNTPOINT==``].UUID | [0]\") }}" when: _UUID is defined and _UUID != '' - name: disks_auto_aws_gcp | change ownership of mountpoint (if set) diff --git a/config/tasks/disks_auto_generic.yml b/config/tasks/disks_auto_generic.yml index 9305fc1e..0d64631b 100644 --- a/config/tasks/disks_auto_generic.yml +++ b/config/tasks/disks_auto_generic.yml @@ -13,7 +13,7 @@ set_fact: hostvols: | {% set res = [] -%} - {% set tmp_blkvols = r__blockdevmap.device_map | selectattr('TYPE', '==', 'disk') | selectattr('MOUNTPOINT', '==', '') | list -%} + {% set tmp_blkvols = r__blockdevmap.device_map | selectattr('TYPE', '==', 'disk') | selectattr('parttable_type', '==', '') | selectattr('MOUNTPOINT', '==', '') | list -%} {% set inventory_hostname__no_suffix = inventory_hostname | regex_replace('-(?!.*-).*') -%} {%- for autovol in cluster_hosts_target | to_json | from_json | json_query('[?starts_with(hostname, \'' + inventory_hostname__no_suffix + '\')].auto_volumes[]') -%} {%- set blkvolloop = namespace(break=false) -%} From 254de305c7dcc6c4e27e5fe961fdbb5ddfee48b3 Mon Sep 17 00:00:00 2001 From: Dougal Seeley Date: Sun, 31 Jan 2021 14:27:48 +0000 Subject: [PATCH 47/58] Update ansible_vault.py to load inline vault secrets. Update other licences --- _dependencies/action_plugins/ansible_vault.py | 87 +++++++++++++++---- _dependencies/library/blockdevmap.py | 1 + _dependencies/library/deprecate_str.py | 1 + _dependencies/library/esxifree_guest.py | 5 +- 4 files changed, 73 insertions(+), 21 deletions(-) diff --git a/_dependencies/action_plugins/ansible_vault.py b/_dependencies/action_plugins/ansible_vault.py index 54d61077..65e4cf4f 100644 --- a/_dependencies/action_plugins/ansible_vault.py +++ b/_dependencies/action_plugins/ansible_vault.py @@ -1,11 +1,54 @@ +# Copyright 2020 Dougal Seeley +# BSD 3-Clause License + from __future__ import (absolute_import, division, print_function) __metaclass__ = type +from ansible import constants as C from ansible.plugins.action import ActionBase -from ansible.parsing.vault import VaultLib, VaultSecret -import re +from ansible.parsing.vault import VaultLib, VaultSecret, parse_vaulttext_envelope, parse_vaulttext +from ansible.utils.display import Display + +display = Display() +################################# +# An action plugin to perform vault encrypt/decrypt operations inside a playbook. Can use either user-provided id/pass, or can use already-loaded vault secrets. +################################# +# +# - name: Encrypt using user-provided vaultid and vaultpass +# ansible_vault: +# vaultid: sandbox +# vaultpass: asdf +# plaintext: "sometext" +# action: encrypt +# register: r__ansible_vault_encrypt +# - debug: msg={{r__ansible_vault_encrypt}} +# +# - name: Decrypt using user-provided vaultid and vaultpass +# ansible_vault: +# vaultid: sandbox +# vaultpass: asdf +# vaulttext: "$ANSIBLE_VAULT;1.2;AES256;sandbox\n303562383536366435346466313764636533353438653463373765616365623130333633613139326235633064643338316665653531663030643139373131390a323233356239303864343336663238616535386638646566623036383130643638373465646331316664636564376161376137623432616561343631313262620a3561656131353364616136373866343963626561366236653538633734653165" +# action: decrypt +# register: r__ansible_vault_decrypt +# - debug: msg={{r__ansible_vault_decrypt}} +# +# - name: Encrypt using already-loaded vault secrets (from command-line, ansible.cfg etc) +# ansible_vault: +# plaintext: "sometext" +# action: encrypt +# register: r__ansible_vault_encrypt +# - debug: msg={{r__ansible_vault_encrypt}} +# +# - name: Decrypt using already-loaded vault secrets (from command-line, ansible.cfg etc) +# ansible_vault: +# vaulttext: "$ANSIBLE_VAULT;1.2;AES256;sandbox\n303562383536366435346466313764636533353438653463373765616365623130333633613139326235633064643338316665653531663030643139373131390a323233356239303864343336663238616535386638646566623036383130643638373465646331316664636564376161376137623432616561343631313262620a3561656131353364616136373866343963626561366236653538633734653165" +# action: decrypt +# register: r__ansible_vault_decrypt +# - debug: msg={{r__ansible_vault_decrypt}} +# +################################# class ActionModule(ActionBase): TRANSFERS_FILES = False @@ -14,35 +57,41 @@ def run(self, tmp=None, task_vars=None): if task_vars is None: task_vars = dict() - if 'vaultid' not in self._task.args or 'vaultpass' not in self._task.args or 'action' not in self._task.args: - return {"failed": True, "msg": "'vaultid' and 'vaultpass' and 'action' are required options"} - result = super(ActionModule, self).run(tmp, task_vars) del tmp # tmp is deprecated + # If user supplies vault-id and vault-pass, use them. Otherwise use those that are automatically loaded with the playbook + if 'vaultpass' in self._task.args: + oVaultSecret = VaultSecret(self._task.args["vaultpass"].encode('utf-8')) + if 'vaultid' in self._task.args: + oVaultLib = VaultLib([(self._task.args["vaultid"], oVaultSecret)]) + else: + display.v(u'No vault-id supplied, using default identity.') + oVaultLib = VaultLib([(C.DEFAULT_VAULT_IDENTITY, oVaultSecret)]) + else: + display.v(u'No vault-id or vault-pass supplied, using playbook-sourced variables.') + oVaultLib = self._loader._vault + if len(self._loader._vault.secrets) == 0: + display.warning("No Vault secrets loaded by config and none supplied to plugin. Vault operations are not possible.") + if self._task.args["action"] == "encrypt": if "plaintext" not in self._task.args: - return {"failed": True, "msg": "'plaintext' is required for encrypt"} - - # encrypt: - oVaultSecret = VaultSecret(self._task.args["vaultpass"].encode('utf-8')) - oVaultLib = VaultLib([(self._task.args["vaultid"], oVaultSecret)]) - vault_tag = oVaultLib.encrypt(self._task.args["plaintext"], oVaultSecret, self._task.args["vaultid"]) + return {"failed": True, "msg": "'plaintext' is required for encrypt."} - # reformat output - g_tag_value = re.match(r"^(?P
\$ANSIBLE_VAULT;(?P[\d\.]+?);(?P\w+?)(?:;(?P.*?))?)[\r\n](?P.*)$", vault_tag, flags=re.DOTALL) - res_cipherstr = re.sub(r'[ \n\r]', "", g_tag_value.group('vaulttext_raw'), flags=re.DOTALL) - res_vaulttext = g_tag_value.group('header') + "\n" + res_cipherstr + b_vaulttext = oVaultLib.encrypt(self._task.args["plaintext"]) + b_ciphertext, b_version, cipher_name, vault_id = parse_vaulttext_envelope(b_vaulttext) - result['msg'] = {"res_vaulttext": res_vaulttext, "plaintext": self._task.args["plaintext"]} + vaulttext_header = b_vaulttext.decode('utf-8').split('\n',1)[0] + result['vaulttext'] = vaulttext_header + "\n" + b_ciphertext.decode('utf-8') + result['plaintext'] = self._task.args["plaintext"] else: if "vaulttext" not in self._task.args: - return {"failed": True, "msg": "'vaulttext' is required for decrypt"} + return {"failed": True, "msg": "'vaulttext' is required for decrypt."} - oVaultLib = VaultLib([(self._task.args["vaultid"], VaultSecret(self._task.args["vaultpass"].encode('utf-8')))]) plaintext = oVaultLib.decrypt(self._task.args["vaulttext"]) - result['msg'] = {"res_vaulttext": self._task.args["vaulttext"], "plaintext": plaintext} + result['vaulttext'] = self._task.args["vaulttext"] + result['plaintext'] = plaintext result['failed'] = False diff --git a/_dependencies/library/blockdevmap.py b/_dependencies/library/blockdevmap.py index ac6eba3e..3549ec88 100644 --- a/_dependencies/library/blockdevmap.py +++ b/_dependencies/library/blockdevmap.py @@ -1,4 +1,5 @@ # Copyright 2020 Dougal Seeley +# BSD 3-Clause License # https://github.com/dseeley/blockdevmap # Copyright 2017 Amazon.com, Inc. and its affiliates. All Rights Reserved. diff --git a/_dependencies/library/deprecate_str.py b/_dependencies/library/deprecate_str.py index 1a3821da..09a58000 100644 --- a/_dependencies/library/deprecate_str.py +++ b/_dependencies/library/deprecate_str.py @@ -1,4 +1,5 @@ # Copyright 2020 Dougal Seeley +# BSD 3-Clause License from __future__ import (absolute_import, division, print_function) diff --git a/_dependencies/library/esxifree_guest.py b/_dependencies/library/esxifree_guest.py index d9a16062..e62fb36b 100644 --- a/_dependencies/library/esxifree_guest.py +++ b/_dependencies/library/esxifree_guest.py @@ -1,7 +1,8 @@ #!/usr/bin/env python3 # -*- coding: utf-8 -*- -# -# GNU General Public License v3.0+ (see COPYING or https://www.gnu.org/licenses/gpl-3.0.txt) + +# Copyright 2020 Dougal Seeley +# BSD 3-Clause License from __future__ import absolute_import, division, print_function From 493b5dee18dd0657f977ae27e73afe0494e36f16 Mon Sep 17 00:00:00 2001 From: Dougal Seeley Date: Mon, 22 Feb 2021 07:51:17 +0000 Subject: [PATCH 48/58] Fix rollback with esxi_free guest when moving disks with _scheme_rmvm_keepdisk_rollback. Previously, when retrieving the existing cluster info, (for augmenting cluster_hosts_target with source disk), was not excluding the host with `lifecycle_state=current`, so was not reattaching the old disk. --- .../_add_src_diskinfo_to_cluster_hosts_target__esxifree.yml | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/redeploy/_scheme_rmvm_keepdisk_rollback/tasks/_add_src_diskinfo_to_cluster_hosts_target__esxifree.yml b/redeploy/_scheme_rmvm_keepdisk_rollback/tasks/_add_src_diskinfo_to_cluster_hosts_target__esxifree.yml index d8e791e2..1717764d 100644 --- a/redeploy/_scheme_rmvm_keepdisk_rollback/tasks/_add_src_diskinfo_to_cluster_hosts_target__esxifree.yml +++ b/redeploy/_scheme_rmvm_keepdisk_rollback/tasks/_add_src_diskinfo_to_cluster_hosts_target__esxifree.yml @@ -22,9 +22,9 @@ cluster_hosts_target: | {%- for cht_host in cluster_hosts_target -%} {%- for cht_autovol in cht_host.auto_volumes -%} - {%- for chs_host_info_result in r__vmware_guest_disk_info.results -%} - {%- if cht_host.hostname | regex_replace('-(?!.*-).*') == chs_host_info_result.item.name | regex_replace('-(?!.*-).*') -%} - {%- for chs_host_diskinfo in chs_host_info_result.guest_disk_info | to_json | from_json | json_query('*| [?unit_number!=`0` && backing_type==\'FlatVer2\' && contains(backing_filename, \'--' + cht_autovol.volname + '.vmdk\')]') -%} + {%- for vmware_guest_disk_info_result in r__vmware_guest_disk_info.results | selectattr('item.tagslabels.lifecycle_state', '!=', 'current')-%} + {%- if cht_host.hostname | regex_replace('-(?!.*-).*') == vmware_guest_disk_info_result.item.name | regex_replace('-(?!.*-).*') -%} + {%- for chs_host_diskinfo in vmware_guest_disk_info_result.guest_disk_info | to_json | from_json | json_query('*| [?unit_number!=`0` && backing_type==\'FlatVer2\' && contains(backing_filename, \'--' + cht_autovol.volname + '.vmdk\')]') -%} {%- set _ = cht_autovol.update({'volume_size': (chs_host_diskinfo.capacity_in_bytes/1073741824)|int, 'src': {'backing_filename': chs_host_diskinfo.backing_filename, 'copy_or_move': _scheme_rmvm_keepdisk_rollback__copy_or_move }}) -%} {%- endfor -%} {%- endif -%} From f5fe35be85a5f71f3a5d533804691cbab996c86b Mon Sep 17 00:00:00 2001 From: Dougal Seeley Date: Tue, 16 Mar 2021 22:02:00 +0000 Subject: [PATCH 49/58] Add Azure. + Refactor to allow easier addition of new cloud environments + Add new blockdevmap.py that supports Azure disks + Add assertion that either all LVM or all not-LVM disks are defined + Only _scheme_rmvm_keepdisk_rollback not supported yet. --- EXAMPLE/README.md | 33 +-- .../aws/testid/cluster_vars__clusterid.yml | 1 - .../testid/eu-west-1/cluster_vars__region.yml | 2 +- .../azure/cluster_vars__cloud.yml | 20 ++ .../azure/testid/cluster_vars__clusterid.yml | 26 +++ .../westeurope/cluster_vars__region.yml | 9 + .../mgmt/cluster_vars__buildenv.yml | 69 +++++++ EXAMPLE/cluster_defs/cluster_vars.yml | 1 + .../cluster_defs/gcp/cluster_vars__cloud.yml | 2 +- .../test_aws_euw1/cluster_vars.yml | 2 +- .../test_gcp_euw1/cluster_vars.yml | 2 +- EXAMPLE/clusterverse_label_upgrade_v1-v2.yml | 6 +- README.md | 32 ++- _dependencies/library/blockdevmap.py | 175 ++++++++++++++-- _dependencies/library/blockdevmap_LICENSE | 2 +- _dependencies/library/blockdevmap_README.md | 26 ++- _dependencies/tasks/main.yml | 4 + clean/tasks/aws.yml | 36 ++++ clean/tasks/azure.yml | 146 +++++++++++++ clean/tasks/clean_networking.yml | 31 --- clean/tasks/clean_vms.yml | 89 -------- clean/tasks/{clean_dns.yml => dns.yml} | 6 +- clean/tasks/esxifree.yml | 25 +++ clean/tasks/gcp.yml | 62 ++++++ clean/tasks/main.yml | 13 +- .../tasks/get_cluster_hosts_state.yml | 103 ---------- .../tasks/get_cluster_hosts_state_aws.yml | 17 ++ .../tasks/get_cluster_hosts_state_azure.yml | 41 ++++ .../get_cluster_hosts_state_esxifree.yml | 43 ++++ .../tasks/get_cluster_hosts_state_gcp.yml | 26 +++ .../tasks/get_cluster_hosts_target.yml | 113 +--------- .../tasks/get_cluster_hosts_target_aws.yml | 78 +++++++ .../get_cluster_hosts_target_esxifree.yml | 11 + .../tasks/get_cluster_hosts_target_gcp.yml | 14 ++ cluster_hosts/tasks/main.yml | 8 +- config/tasks/disks_auto_aws_gcp.yml | 154 -------------- config/tasks/disks_auto_aws_gcp_azure.yml | 173 ++++++++++++++++ config/tasks/main.yml | 8 +- create/tasks/aws.yml | 2 +- create/tasks/azure.yml | 194 ++++++++++++++++++ create/tasks/gcp.yml | 18 +- dynamic_inventory/tasks/azure.yml | 123 +++++++++++ jenkinsfiles/Jenkinsfile_testsuite | 2 +- readiness/tasks/main.yml | 2 +- readiness/tasks/remove_maintenance_mode.yml | 58 ------ .../tasks/remove_maintenance_mode_aws.yml | 13 ++ .../tasks/remove_maintenance_mode_azure.yml | 30 +++ .../remove_maintenance_mode_esxifree.yml | 11 + .../tasks/remove_maintenance_mode_gcp.yml | 29 +++ redeploy/__common/tasks/poweroff_vms.yml | 80 -------- redeploy/__common/tasks/poweroff_vms_aws.yml | 28 +++ .../__common/tasks/poweroff_vms_azure.yml | 33 +++ .../__common/tasks/poweroff_vms_esxifree.yml | 26 +++ redeploy/__common/tasks/poweroff_vms_gcp.yml | 30 +++ redeploy/__common/tasks/poweron_vms.yml | 58 ------ redeploy/__common/tasks/poweron_vms_aws.yml | 18 ++ redeploy/__common/tasks/poweron_vms_azure.yml | 31 +++ .../__common/tasks/poweron_vms_esxifree.yml | 16 ++ redeploy/__common/tasks/poweron_vms_gcp.yml | 30 +++ .../tasks/set_lifecycle_state_label.yml | 43 ---- .../tasks/set_lifecycle_state_label_aws.yml | 14 ++ .../tasks/set_lifecycle_state_label_azure.yml | 18 ++ .../set_lifecycle_state_label_esxifree.yml | 14 ++ .../tasks/set_lifecycle_state_label_gcp.yml | 16 ++ .../tasks/main.yml | 8 +- .../tasks/redeploy.yml | 4 +- .../tasks/rescue.yml | 6 +- .../tasks/main.yml | 16 +- .../tasks/redeploy_by_hosttype_by_host.yml | 2 +- .../tasks/rescue.yml | 8 +- .../tasks/by_hosttype_by_host.yml | 12 +- .../tasks/main.yml | 10 +- .../tasks/by_hosttype_by_host.yml | 4 +- 73 files changed, 1775 insertions(+), 841 deletions(-) create mode 100644 EXAMPLE/cluster_defs/azure/cluster_vars__cloud.yml create mode 100644 EXAMPLE/cluster_defs/azure/testid/cluster_vars__clusterid.yml create mode 100644 EXAMPLE/cluster_defs/azure/testid/westeurope/cluster_vars__region.yml create mode 100644 EXAMPLE/cluster_defs/azure/testid/westeurope/mgmt/cluster_vars__buildenv.yml create mode 100644 clean/tasks/aws.yml create mode 100644 clean/tasks/azure.yml delete mode 100644 clean/tasks/clean_networking.yml delete mode 100644 clean/tasks/clean_vms.yml rename clean/tasks/{clean_dns.yml => dns.yml} (97%) create mode 100644 clean/tasks/esxifree.yml create mode 100644 clean/tasks/gcp.yml delete mode 100644 cluster_hosts/tasks/get_cluster_hosts_state.yml create mode 100644 cluster_hosts/tasks/get_cluster_hosts_state_aws.yml create mode 100644 cluster_hosts/tasks/get_cluster_hosts_state_azure.yml create mode 100644 cluster_hosts/tasks/get_cluster_hosts_state_esxifree.yml create mode 100644 cluster_hosts/tasks/get_cluster_hosts_state_gcp.yml create mode 100644 cluster_hosts/tasks/get_cluster_hosts_target_aws.yml create mode 100644 cluster_hosts/tasks/get_cluster_hosts_target_esxifree.yml create mode 100644 cluster_hosts/tasks/get_cluster_hosts_target_gcp.yml delete mode 100644 config/tasks/disks_auto_aws_gcp.yml create mode 100644 config/tasks/disks_auto_aws_gcp_azure.yml create mode 100644 create/tasks/azure.yml create mode 100644 dynamic_inventory/tasks/azure.yml delete mode 100644 readiness/tasks/remove_maintenance_mode.yml create mode 100644 readiness/tasks/remove_maintenance_mode_aws.yml create mode 100644 readiness/tasks/remove_maintenance_mode_azure.yml create mode 100644 readiness/tasks/remove_maintenance_mode_esxifree.yml create mode 100644 readiness/tasks/remove_maintenance_mode_gcp.yml delete mode 100644 redeploy/__common/tasks/poweroff_vms.yml create mode 100644 redeploy/__common/tasks/poweroff_vms_aws.yml create mode 100644 redeploy/__common/tasks/poweroff_vms_azure.yml create mode 100644 redeploy/__common/tasks/poweroff_vms_esxifree.yml create mode 100644 redeploy/__common/tasks/poweroff_vms_gcp.yml delete mode 100644 redeploy/__common/tasks/poweron_vms.yml create mode 100644 redeploy/__common/tasks/poweron_vms_aws.yml create mode 100644 redeploy/__common/tasks/poweron_vms_azure.yml create mode 100644 redeploy/__common/tasks/poweron_vms_esxifree.yml create mode 100644 redeploy/__common/tasks/poweron_vms_gcp.yml delete mode 100644 redeploy/__common/tasks/set_lifecycle_state_label.yml create mode 100644 redeploy/__common/tasks/set_lifecycle_state_label_aws.yml create mode 100644 redeploy/__common/tasks/set_lifecycle_state_label_azure.yml create mode 100644 redeploy/__common/tasks/set_lifecycle_state_label_esxifree.yml create mode 100644 redeploy/__common/tasks/set_lifecycle_state_label_gcp.yml diff --git a/EXAMPLE/README.md b/EXAMPLE/README.md index 8f68fcb7..25776033 100644 --- a/EXAMPLE/README.md +++ b/EXAMPLE/README.md @@ -17,17 +17,22 @@ The `cluster.yml` sub-role immutably deploys a cluster from the config defined a ### AWS: ``` -ansible-playbook -u ubuntu --private-key=/home//.ssh/ cluster.yml -e buildenv=sandbox -e cloud_type=aws -e region=eu-west-1 -e clusterid=test --vault-id=sandbox@.vaultpass-client.py -ansible-playbook -u ubuntu --private-key=/home//.ssh/ cluster.yml -e buildenv=sandbox -e cloud_type=aws -e region=eu-west-1 -e clusterid=test --vault-id=sandbox@.vaultpass-client.py --tags=clusterverse_clean -e clean=_all_ -ansible-playbook -u ubuntu --private-key=/home//.ssh/ cluster.yml -e buildenv=sandbox -e clusterid=test_aws_euw1 --vault-id=sandbox@.vaultpass-client.py -ansible-playbook -u ubuntu --private-key=/home//.ssh/ cluster.yml -e buildenv=sandbox -e clusterid=test_aws_euw1 --vault-id=sandbox@.vaultpass-client.py --tags=clusterverse_clean -e clean=_all_ +ansible-playbook cluster.yml -e buildenv=sandbox -e clusterid=testid -e cloud_type=aws -e region=eu-west-1 --vault-id=sandbox@.vaultpass-client.py +ansible-playbook cluster.yml -e buildenv=sandbox -e clusterid=testid -e cloud_type=aws -e region=eu-west-1 --vault-id=sandbox@.vaultpass-client.py --tags=clusterverse_clean -e clean=_all_ +ansible-playbook cluster.yml -e buildenv=sandbox -e clusterid=test_aws_euw1 --vault-id=sandbox@.vaultpass-client.py +ansible-playbook cluster.yml -e buildenv=sandbox -e clusterid=test_aws_euw1 --vault-id=sandbox@.vaultpass-client.py --tags=clusterverse_clean -e clean=_all_ ``` ### GCP: ``` -ansible-playbook -u --private-key=/home//.ssh/ cluster.yml -e buildenv=sandbox -e clusterid=test -e cloud_type=gcp -e region=europe-west1 --vault-id=sandbox@.vaultpass-client.py -ansible-playbook -u --private-key=/home//.ssh/ cluster.yml -e buildenv=sandbox -e clusterid=test -e cloud_type=gcp -e region=europe-west1 --vault-id=sandbox@.vaultpass-client.py --tags=clusterverse_clean -e clean=_all_ -ansible-playbook -u --private-key=/home//.ssh/ cluster.yml -e buildenv=sandbox -e clusterid=test_gcp_euw1 --vault-id=sandbox@.vaultpass-client.py -ansible-playbook -u --private-key=/home//.ssh/ cluster.yml -e buildenv=sandbox -e clusterid=test_gcp_euw1 --vault-id=sandbox@.vaultpass-client.py --tags=clusterverse_clean -e clean=_all_ +ansible-playbook cluster.yml -e buildenv=sandbox -e clusterid=testid -e cloud_type=gcp -e region=europe-west1 --vault-id=sandbox@.vaultpass-client.py +ansible-playbook cluster.yml -e buildenv=sandbox -e clusterid=testid -e cloud_type=gcp -e region=europe-west1 --vault-id=sandbox@.vaultpass-client.py --tags=clusterverse_clean -e clean=_all_ +ansible-playbook cluster.yml -e buildenv=sandbox -e clusterid=test_gcp_euw1 --vault-id=sandbox@.vaultpass-client.py +ansible-playbook cluster.yml -e buildenv=sandbox -e clusterid=test_gcp_euw1 --vault-id=sandbox@.vaultpass-client.py --tags=clusterverse_clean -e clean=_all_ +``` +### Azure: +``` +ansible-playbook cluster.yml -e buildenv=sandbox -e clusterid=testid -e cloud_type=azure -e region=westeurope --vault-id=sandbox@.vaultpass-client.py +ansible-playbook cluster.yml -e buildenv=sandbox -e clusterid=testid -e cloud_type=azure -e region=westeurope --vault-id=sandbox@.vaultpass-client.py --tags=clusterverse_clean -e clean=_all_ ``` ### Mandatory command-line variables: @@ -62,13 +67,17 @@ The `redeploy.yml` sub-role will completely redeploy the cluster; this is useful ### AWS: ``` -ansible-playbook -u ubuntu --private-key=/home//.ssh/ redeploy.yml -e buildenv=sandbox -e clusterid=test_aws_euw1 --vault-id=sandbox@.vaultpass-client.py -e canary=none -ansible-playbook -u ubuntu --private-key=/home//.ssh/ redeploy.yml -e buildenv=sandbox -e cloud_type=aws -e region=eu-west-1 -e clusterid=test --vault-id=sandbox@.vaultpass-client.py -e canary=none +ansible-playbook redeploy.yml -e buildenv=sandbox -e cloud_type=aws -e region=eu-west-1 -e clusterid=test --vault-id=sandbox@.vaultpass-client.py -e canary=none +ansible-playbook redeploy.yml -e buildenv=sandbox -e clusterid=test_aws_euw1 --vault-id=sandbox@.vaultpass-client.py -e canary=none ``` ### GCP: ``` -ansible-playbook -u --private-key=/home//.ssh/ redeploy.yml -e buildenv=sandbox -e clusterid=test_aws_euw1 --vault-id=sandbox@.vaultpass-client.py -e canary=none -ansible-playbook -u --private-key=/home//.ssh/ redeploy.yml -e buildenv=sandbox -e clusterid=test -e cloud_type=gcp -e region=europe-west1 --vault-id=sandbox@.vaultpass-client.py -e canary=none +ansible-playbook redeploy.yml -e buildenv=sandbox -e clusterid=test -e cloud_type=gcp -e region=europe-west1 --vault-id=sandbox@.vaultpass-client.py -e canary=none +ansible-playbook redeploy.yml -e buildenv=sandbox -e clusterid=test_aws_euw1 --vault-id=sandbox@.vaultpass-client.py -e canary=none +``` +### Azure: +``` +ansible-playbook redeploy.yml -e buildenv=sandbox -e clusterid=test -e cloud_type=azure -e region=westeurope --vault-id=sandbox@.vaultpass-client.py -e canary=none ``` ### Mandatory command-line variables: diff --git a/EXAMPLE/cluster_defs/aws/testid/cluster_vars__clusterid.yml b/EXAMPLE/cluster_defs/aws/testid/cluster_vars__clusterid.yml index 42b49950..4156457b 100644 --- a/EXAMPLE/cluster_defs/aws/testid/cluster_vars__clusterid.yml +++ b/EXAMPLE/cluster_defs/aws/testid/cluster_vars__clusterid.yml @@ -18,7 +18,6 @@ beats_config: cluster_vars: dns_nameserver_zone: &dns_nameserver_zone "" # The zone that dns_server will operate on. gcloud dns needs a trailing '.'. Leave blank if no external DNS (use IPs only) dns_user_domain: "{%- if _dns_nameserver_zone -%}{{cloud_type}}-{{region}}.{{app_class}}.{{buildenv}}.{{_dns_nameserver_zone}}{%- endif -%}" # A user-defined _domain_ part of the FDQN, (if more prefixes are required before the dns_nameserver_zone) - dns_server: "" # Specify DNS server. nsupdate, route53 or clouddns. If empty string is specified, no DNS will be added. instance_profile_name: "" custom_tagslabels: inv_resident_id: "myresident" diff --git a/EXAMPLE/cluster_defs/aws/testid/eu-west-1/cluster_vars__region.yml b/EXAMPLE/cluster_defs/aws/testid/eu-west-1/cluster_vars__region.yml index 69e1389e..21773b54 100644 --- a/EXAMPLE/cluster_defs/aws/testid/eu-west-1/cluster_vars__region.yml +++ b/EXAMPLE/cluster_defs/aws/testid/eu-west-1/cluster_vars__region.yml @@ -1,5 +1,5 @@ --- cluster_vars: - image: "ami-04ffbabc7935ec0e9" # eu-west-1, ubuntu, 20.04, amd64, hvm-ssd, 20210108. Ubuntu images can be located at https://cloud-images.ubuntu.com/locator/ + image: "ami-0dd0f5f97a21a8fe9" # eu-west-1, ubuntu, 20.04, amd64, hvm-ssd, 20210315. Ubuntu images can be located at https://cloud-images.ubuntu.com/locator/ # image: "ami-0b850cf02cc00fdc8" # eu-west-1, CentOS7 diff --git a/EXAMPLE/cluster_defs/azure/cluster_vars__cloud.yml b/EXAMPLE/cluster_defs/azure/cluster_vars__cloud.yml new file mode 100644 index 00000000..e8a6e713 --- /dev/null +++ b/EXAMPLE/cluster_defs/azure/cluster_vars__cloud.yml @@ -0,0 +1,20 @@ +--- + +redeploy_schemes_supported: ['_scheme_addallnew_rmdisk_rollback', '_scheme_addnewvm_rmdisk_rollback', '_scheme_rmvm_rmdisk_only'] # TODO: support _scheme_rmvm_keepdisk_rollback + +cluster_vars: + dns_cloud_internal_domain: "ACCOUNTNAME_CHANGEME.onmicrosoft.com" # The cloud-internal zone as defined by the cloud provider (e.g. GCP, AWS) + dns_server: "" # Specify DNS server. nsupdate, route53 or clouddns. If empty string is specified, no DNS will be added. + assign_public_ip: "yes" + inventory_ip: "public" # 'public' or 'private', (private in case we're operating in a private LAN). If public, 'assign_public_ip' must be 'yes' + user_data: |- + #cloud-config + system_info: + default_user: + name: ansible + rules: + - name: "SSHExternal" + priority: "100" + protocol: "Tcp" + destination_port_range: ["22"] + source_address_prefix: "{{_ssh_whitelist}}" diff --git a/EXAMPLE/cluster_defs/azure/testid/cluster_vars__clusterid.yml b/EXAMPLE/cluster_defs/azure/testid/cluster_vars__clusterid.yml new file mode 100644 index 00000000..4156457b --- /dev/null +++ b/EXAMPLE/cluster_defs/azure/testid/cluster_vars__clusterid.yml @@ -0,0 +1,26 @@ +--- + +prometheus_node_exporter_install: false +filebeat_install: false +metricbeat_install: false + +beats_config: + filebeat: +# output_logstash_hosts: ["localhost:5044"] # The destination hosts for filebeat-gathered logs +# extra_logs_paths: # The array is optional, if you need to add more paths or files to scrape for logs +# - /var/log/myapp/*.log + metricbeat: +# output_logstash_hosts: ["localhost:5044"] # The destination hosts for metricbeat-gathered metrics +# diskio: # Diskio retrieves metrics for all disks partitions by default. When diskio.include_devices is defined, only look for defined partitions +# include_devices: ["sda", "sdb", "nvme0n1", "nvme1n1", "nvme2n1"] + + +cluster_vars: + dns_nameserver_zone: &dns_nameserver_zone "" # The zone that dns_server will operate on. gcloud dns needs a trailing '.'. Leave blank if no external DNS (use IPs only) + dns_user_domain: "{%- if _dns_nameserver_zone -%}{{cloud_type}}-{{region}}.{{app_class}}.{{buildenv}}.{{_dns_nameserver_zone}}{%- endif -%}" # A user-defined _domain_ part of the FDQN, (if more prefixes are required before the dns_nameserver_zone) + instance_profile_name: "" + custom_tagslabels: + inv_resident_id: "myresident" + inv_proposition_id: "myproposition" + inv_cost_centre: "0000000000" +_dns_nameserver_zone: *dns_nameserver_zone diff --git a/EXAMPLE/cluster_defs/azure/testid/westeurope/cluster_vars__region.yml b/EXAMPLE/cluster_defs/azure/testid/westeurope/cluster_vars__region.yml new file mode 100644 index 00000000..36d67fba --- /dev/null +++ b/EXAMPLE/cluster_defs/azure/testid/westeurope/cluster_vars__region.yml @@ -0,0 +1,9 @@ +--- + +_ubuntu2004image: { "publisher": "canonical", "offer": "0001-com-ubuntu-server-focal", "sku": "20_04-lts-gen2", "version": "latest" } +_ubuntu1804image: { "publisher": "canonical", "offer": "UbuntuServer", "sku": "18_04-lts-gen2", "version": "latest" } +_centos7image: { "publisher": "OpenLogic", "offer": "CentOS", "sku": "7_9-gen2", "version": "latest" } + +cluster_vars: + image: "{{_ubuntu2004image}}" + diff --git a/EXAMPLE/cluster_defs/azure/testid/westeurope/mgmt/cluster_vars__buildenv.yml b/EXAMPLE/cluster_defs/azure/testid/westeurope/mgmt/cluster_vars__buildenv.yml new file mode 100644 index 00000000..9431e802 --- /dev/null +++ b/EXAMPLE/cluster_defs/azure/testid/westeurope/mgmt/cluster_vars__buildenv.yml @@ -0,0 +1,69 @@ +--- + +cluster_vars: + sandbox: + azure_subscription_id: !vault | + $ANSIBLE_VAULT;1.2;AES256;sandbox + 7669080460651349243347331538721104778691266429457726036813912140404310 + azure_client_id: !vault | + $ANSIBLE_VAULT;1.2;AES256;sandbox + 7669080460651349243347331538721104778691266429457726036813912140404310 + azure_secret: !vault | + $ANSIBLE_VAULT;1.2;AES256;sandbox + 7669080460651349243347331538721104778691266429457726036813912140404310 + azure_tenant: !vault | + $ANSIBLE_VAULT;1.2;AES256;sandbox + 7669080460651349243347331538721104778691266429457726036813912140404310 + ssh_connection_cfg: + host: &host_ssh_connection_cfg + ansible_user: "ansible" + ansible_ssh_private_key_file: !vault | + $ANSIBLE_VAULT;1.2;AES256;sandbox + 7669080460651349243347331538721104778691266429457726036813912140404310 + bastion: + ssh_args: '-o ProxyCommand="ssh -o StrictHostKeyChecking=no -o UserKnownHostsFile=/dev/null -i ./id_rsa_bastion -W %h:%p -q user@192.168.0.1"' + ssh_priv_key: !vault | + $ANSIBLE_VAULT;1.2;AES256;sandbox + 7669080460651349243347331538721104778691266429457726036813912140404310 + azure_resource_group: "compute" + vnet_name: "{{buildenv}}" + vpc_subnet_name_prefix: "{{buildenv}}-test-{{region}}" +# nsupdate_cfg: {server: "", key_name: "", key_secret: ""} # If you're using bind9 (or other nsupdate-compatible 'dns_server') + + hosttype_vars: + sys: + auto_volumes: [ ] + flavor: Standard_B1ls + version: "{{sys_version | default('')}}" + vms_by_az: { 1: 1, 2: 0, 3: 0 } + +# sysdisks2: +# auto_volumes: +# - { device_name: "0", disk_size_gb: 1, storage_account_type: "StandardSSD_LRS", mountpoint: "/media/mysvc0", fstype: "ext4", caching: "ReadOnly", perms: { owner: "root", group: "root", mode: "775" } } +# - { device_name: "1", disk_size_gb: 1, storage_account_type: "StandardSSD_LRS", mountpoint: "/media/mysvc1", fstype: "ext4", caching: "ReadOnly" } +# flavor: Standard_B1ls +# os_disk_size_gb: "35" # This is optional, and if set, MUST be bigger than the original image size (e.g. 30GB for Ubuntu2004) +# version: "{{sysdisks_version | default('')}}" +# vms_by_az: { 1: 1, 2: 0, 3: 0 } + +# sysdisks2lvm: +# auto_volumes: +# - { device_name: "0", disk_size_gb: 1, storage_account_type: "StandardSSD_LRS", mountpoint: "/media/mysvc0", fstype: "ext4", caching: "ReadOnly" } +# - { device_name: "1", disk_size_gb: 1, storage_account_type: "StandardSSD_LRS", mountpoint: "/media/mysvc0", fstype: "ext4", caching: "ReadOnly" } +# lvmparams: {vg_name: vg0, lv_name: lv0, lv_size: +100%FREE} +# flavor: Standard_B1ls +# os_disk_size_gb: "35" # This is optional, and if set, MUST be bigger than the original image size (e.g. 30GB for Ubuntu2004) +# version: "{{sysdisks_version | default('')}}" +# vms_by_az: { 1: 1, 2: 0, 3: 0 } + +# sysdisks4: +# auto_volumes: +# - { device_name: "3", disk_size_gb: 1, storage_account_type: "StandardSSD_LRS", mountpoint: "/media/mysvc3", fstype: "ext4", caching: "ReadOnly" } +# - { device_name: "1", disk_size_gb: 1, storage_account_type: "StandardSSD_LRS", mountpoint: "/media/mysvc1", fstype: "ext4", caching: "ReadOnly" } +# - { device_name: "0", disk_size_gb: 1, storage_account_type: "StandardSSD_LRS", mountpoint: "/media/mysvc0", fstype: "ext4", caching: "ReadOnly" } +# - { device_name: "2", disk_size_gb: 1, storage_account_type: "StandardSSD_LRS", mountpoint: "/media/mysvc2", fstype: "ext4", caching: "ReadOnly" } +# flavor: Standard_B2s # B1ls only supports 2 disks (B2s supports 4) +# version: "{{sysdisks_version | default('')}}" +# vms_by_az: { 1: 1, 2: 1, 3: 0 } + +_host_ssh_connection_cfg: { <<: *host_ssh_connection_cfg } diff --git a/EXAMPLE/cluster_defs/cluster_vars.yml b/EXAMPLE/cluster_defs/cluster_vars.yml index 072a5d84..76a9eacc 100644 --- a/EXAMPLE/cluster_defs/cluster_vars.yml +++ b/EXAMPLE/cluster_defs/cluster_vars.yml @@ -8,6 +8,7 @@ redeploy_schemes_supported: ['_scheme_addallnew_rmdisk_rollback', '_scheme_addne #redeploy_scheme: _scheme_rmvm_keepdisk_rollback skip_dynamic_inventory_sshwait: true +test_touch_disks: true app_name: "{{lookup('pipe', 'whoami')}}-test" # The name of the application cluster (e.g. 'couchbase', 'nginx'); becomes part of cluster_name. Provided is a default to ensure no accidental overwriting. app_class: "test" # The class of application (e.g. 'database', 'webserver'); becomes part of the fqdn diff --git a/EXAMPLE/cluster_defs/gcp/cluster_vars__cloud.yml b/EXAMPLE/cluster_defs/gcp/cluster_vars__cloud.yml index 265cde54..deef36a8 100644 --- a/EXAMPLE/cluster_defs/gcp/cluster_vars__cloud.yml +++ b/EXAMPLE/cluster_defs/gcp/cluster_vars__cloud.yml @@ -1,7 +1,7 @@ --- cluster_vars: - image: "projects/ubuntu-os-cloud/global/images/ubuntu-2004-focal-v20210112" # Ubuntu images can be located at https://cloud-images.ubuntu.com/locator/ + image: "projects/ubuntu-os-cloud/global/images/ubuntu-2004-focal-v20210315" # Ubuntu images can be located at https://cloud-images.ubuntu.com/locator/ # image: "projects/ubuntu-os-cloud/global/images/centos-7-v20201216 dns_cloud_internal_domain: "c.{{ (_gcp_service_account_rawtext | string | from_json).project_id }}.internal" # The cloud-internal zone as defined by the cloud provider (e.g. GCP, AWS) dns_server: "clouddns" # Specify DNS server. nsupdate, route53 or clouddns. If empty string is specified, no DNS will be added. diff --git a/EXAMPLE/cluster_defs/test_aws_euw1/cluster_vars.yml b/EXAMPLE/cluster_defs/test_aws_euw1/cluster_vars.yml index f3e071a5..0a1cf7a2 100644 --- a/EXAMPLE/cluster_defs/test_aws_euw1/cluster_vars.yml +++ b/EXAMPLE/cluster_defs/test_aws_euw1/cluster_vars.yml @@ -50,7 +50,7 @@ cluster_name: "{{app_name}}-{{buildenv}}" # Identifies the cluster within cluster_vars: type: &cloud_type "aws" - image: "ami-04ffbabc7935ec0e9" # eu-west-1, ubuntu, 20.04, amd64, hvm-ssd, 20210108. Ubuntu images can be located at https://cloud-images.ubuntu.com/locator/ + image: "ami-0dd0f5f97a21a8fe9" # eu-west-1, ubuntu, 20.04, amd64, hvm-ssd, 20210315. Ubuntu images can be located at https://cloud-images.ubuntu.com/locator/ # image: "ami-0b850cf02cc00fdc8" # eu-west-1, CentOS7 region: ®ion "eu-west-1" dns_cloud_internal_domain: "{{_region}}.compute.internal" # The cloud-internal zone as defined by the cloud provider (e.g. GCP, AWS) diff --git a/EXAMPLE/cluster_defs/test_gcp_euw1/cluster_vars.yml b/EXAMPLE/cluster_defs/test_gcp_euw1/cluster_vars.yml index c7a8bbfc..a843c541 100644 --- a/EXAMPLE/cluster_defs/test_gcp_euw1/cluster_vars.yml +++ b/EXAMPLE/cluster_defs/test_gcp_euw1/cluster_vars.yml @@ -50,7 +50,7 @@ cluster_name: "{{app_name}}-{{buildenv}}" # Identifies the cluster within cluster_vars: type: &cloud_type "gcp" - image: "projects/ubuntu-os-cloud/global/images/ubuntu-2004-focal-v20210112" # Ubuntu images can be located at https://cloud-images.ubuntu.com/locator/ + image: "projects/ubuntu-os-cloud/global/images/ubuntu-2004-focal-v20210315" # Ubuntu images can be located at https://cloud-images.ubuntu.com/locator/ # image: "projects/ubuntu-os-cloud/global/images/centos-7-v20201216 region: ®ion "europe-west1" dns_cloud_internal_domain: "c.{{ (_gcp_service_account_rawtext | string | from_json).project_id }}.internal" # The cloud-internal zone as defined by the cloud provider (e.g. GCP, AWS) diff --git a/EXAMPLE/clusterverse_label_upgrade_v1-v2.yml b/EXAMPLE/clusterverse_label_upgrade_v1-v2.yml index 18e2f32b..65dd512c 100644 --- a/EXAMPLE/clusterverse_label_upgrade_v1-v2.yml +++ b/EXAMPLE/clusterverse_label_upgrade_v1-v2.yml @@ -5,12 +5,12 @@ connection: local gather_facts: true tasks: - - import_role: + - include: name: 'clusterverse/_dependencies' - - import_role: + - include_role: name: 'clusterverse/cluster_hosts' - tasks_from: get_cluster_hosts_state.yml + tasks_from: "get_cluster_hosts_state_{{cluster_vars.type}}.yml" - block: - name: clusterverse_label_upgrade_v1-v2 | Add lifecycle_state and cluster_suffix label to AWS EC2 VM diff --git a/README.md b/README.md index 167c6726..931215f9 100644 --- a/README.md +++ b/README.md @@ -1,9 +1,9 @@ # clusterverse   [![License](https://img.shields.io/badge/License-BSD%203--Clause-blue.svg)](https://opensource.org/licenses/BSD-3-Clause) ![PRs Welcome](https://img.shields.io/badge/PRs-Welcome-brightgreen.svg) A full-lifecycle, immutable cloud infrastructure cluster management **role**, using Ansible. -+ **Multi-cloud:** clusterverse can manage cluster lifecycle in AWS and GCP ++ **Multi-cloud:** clusterverse can manage cluster lifecycle in AWS, GCP and Azure + **Deploy:** You define your infrastructure as code (in Ansible yaml), and clusterverse will deploy it + **Scale-up:** If you change the cluster definitions and rerun the deploy, new nodes will be added. -+ **Redeploy (e.g. up-version):** If you need to up-version, the `redeploy.yml` playbook will replace each node in turn, (with optional callbacks), and rollback if any failures occur. ++ **Redeploy (e.g. up-version):** If you need to up-version, or replace the underlying OS, (i.e. to achieve fully immutable, zero-patching redeploys), the `redeploy.yml` playbook will replace each node in the cluster (via various redeploy schemes), and rollback if any failures occur. **clusterverse** is designed to manage base-vm infrastructure that underpins cluster-based infrastructure, for example, Couchbase, Kafka, Elasticsearch, or Cassandra. @@ -22,22 +22,33 @@ To active the pipenv: ### AWS + AWS account with IAM rights to create EC2 VMs and security groups in the chosen VPCs/subnets. Place the credentials in: - + `cluster_vars//aws_access_key:` - + `cluster_vars//aws_secret_key:` + + `cluster_vars[buildenv].aws_access_key:` + + `cluster_vars[buildenv].aws_secret_key:` + Preexisting VPCs: - + `cluster_vars//vpc_name: my-vpc-{{buildenv}}` + + `cluster_vars[buildenv].vpc_name: my-vpc-{{buildenv}}` + Preexisting subnets. This is a prefix - the cloud availability zone will be appended to the end (e.g. `a`, `b`, `c`). - + `cluster_vars//vpc_subnet_name_prefix: my-subnet-{{region}}` + + `cluster_vars[buildenv].vpc_subnet_name_prefix: my-subnet-{{region}}` + Preexisting keys (in AWS IAM): - + `cluster_vars//key_name: my_key__id_rsa` + + `cluster_vars[buildenv].key_name: my_key__id_rsa` ### GCP + Create a gcloud account. + Create a service account in `IAM & Admin` / `Service Accounts`. Download the json file locally. -+ Store the contents within the `cluster_vars/gcp_service_account_rawtext` variable. ++ Store the contents within the `cluster_vars[buildenv].gcp_service_account_rawtext` variable. + During execution, the json file will be copied locally because the Ansible GCP modules often require the file as input. + Google Cloud SDK needs to be installed to run gcloud command-line (e.g. to disable delete protection) - this is handled by `pipenv install` +### Azure ++ Create an Azure account. ++ Create a Tenant and a Subscription ++ Create a Resource group and networks/subnetworks within that. ++ Create a service principal - add the credentials to: + + `cluster_vars[buildenv].azure_subscription_id` + + `cluster_vars[buildenv].azure_client_id` + + `cluster_vars[buildenv].azure_secret` + + `cluster_vars[buildenv].azure_tenant` + + ### DNS DNS is optional. If unset, no DNS names will be created. If DNS is required, you will need a DNS zone delegated to one of the following: + nsupdate (e.g. bind9) @@ -183,7 +194,7 @@ The role is designed to run in two modes: + The `redeploy.yml` sub-role will completely redeploy the cluster; this is useful for example to upgrade the underlying operating system version. + It supports `canary` deploys. The `canary` extra variable must be defined on the command line set to one of: `start`, `finish`, `none` or `tidy`. + It contains callback hooks: - + `mainclusteryml`: This is the name of the deployment playbook. It is called to rollback a failed deployment. It should be set to the value of the primary _deploy_ playbook yml (e.g. `cluster.yml`) + + `mainclusteryml`: This is the name of the deployment playbook. It is called to deploy nodes for the new cluster, or to rollback a failed deployment. It should be set to the value of the primary _deploy_ playbook yml (e.g. `cluster.yml`) + `predeleterole`: This is the name of a role that should be called prior to deleting VMs; it is used for example to eject nodes from a Couchbase cluster. It takes a list of `hosts_to_remove` VMs. + It supports pluggable redeployment schemes. The following are provided: + **_scheme_rmvm_rmdisk_only** @@ -217,7 +228,7 @@ The role is designed to run in two modes: + To delete the old VMs, either set '-e canary_tidy_on_success=true', or call redeploy.yml with '-e canary=tidy' + **_scheme_rmvm_keepdisk_rollback** + Redeploys the nodes one by one, and moves the secondary (non-root) disks from the old to the new (note, only non-ephemeral disks can be moved). - + _Cluster topology must remain identical. More disks may be added, but none may change or be removed._ + + _Cluster node topology must remain identical. More disks may be added, but none may change or be removed._ + **It assumes a resilient deployment (it can tolerate one node being removed from the cluster).** + For each node in the cluster: + Run `predeleterole` @@ -228,3 +239,4 @@ The role is designed to run in two modes: + If `canary=start`, only the first node is redeployed. If `canary=finish`, only the remaining (non-first), nodes are replaced. If `canary=none`, all nodes are redeployed. + If the process fails for any reason, the old VMs are reinstated (and the disks reattached to the old nodes), and the new VMs are stopped (rollback) + To delete the old VMs, either set '-e canary_tidy_on_success=true', or call redeploy.yml with '-e canary=tidy' + + (Azure functionality coming soon) diff --git a/_dependencies/library/blockdevmap.py b/_dependencies/library/blockdevmap.py index 3549ec88..b6a8eeab 100644 --- a/_dependencies/library/blockdevmap.py +++ b/_dependencies/library/blockdevmap.py @@ -18,22 +18,16 @@ version_added: 1.0.0 short_description: blockdevmap description: - - Map the block device name as defined in AWS/GCP (e.g. /dev/sdf) with the volume provided to the OS + - Map the block device name as defined in AWS/GCP/Azure (e.g. /dev/sdf) with the volume provided to the OS authors: - Dougal Seeley - Amazon.com Inc. ''' EXAMPLES = ''' -- name: Get block device map information for GCP +- name: Get block device map information for cloud blockdevmap: - cloud_type: gcp - become: yes - register: r__blockdevmap - -- name: Get block device map information for AWS - blockdevmap: - cloud_type: aws + cloud_type: become: yes register: r__blockdevmap @@ -48,6 +42,7 @@ ''' RETURN = ''' +## AWS Nitro "device_map": [ { "FSTYPE": "ext4", @@ -86,6 +81,7 @@ "volume_id": "vol-0b05e48d5677db81a" } +## AWS non-Nitro "device_map": [ { "FSTYPE": "", @@ -109,11 +105,142 @@ "device_name_cloud": "/dev/sda1", "device_name_os": "/dev/xvda1" } + +## AZURE +"device_map": [ + { + "FSTYPE": "", + "HCTL": "0:0:0:0", + "MODEL": "Virtual Disk", + "MOUNTPOINT": "", + "NAME": "sda", + "SERIAL": "6002248071748569390b23178109d35e", + "SIZE": "32212254720", + "TYPE": "disk", + "UUID": "", + "device_name_cloud": "ROOTDISK", + "device_name_os": "/dev/sda", + "parttable_type": "gpt" + }, + { + "FSTYPE": "xfs", + "HCTL": "", + "MODEL": "", + "MOUNTPOINT": "/boot", + "NAME": "sda1", + "SERIAL": "", + "SIZE": "524288000", + "TYPE": "part", + "UUID": "8bd4ad1d-13a7-4bb1-a40c-b05444f11db3", + "device_name_cloud": "", + "device_name_os": "/dev/sda1", + "parttable_type": "gpt" + }, + { + "FSTYPE": "", + "HCTL": "", + "MODEL": "", + "MOUNTPOINT": "", + "NAME": "sda14", + "SERIAL": "", + "SIZE": "4194304", + "TYPE": "part", + "UUID": "", + "device_name_cloud": "", + "device_name_os": "/dev/sda14", + "parttable_type": "gpt" + }, + { + "FSTYPE": "vfat", + "HCTL": "", + "MODEL": "", + "MOUNTPOINT": "/boot/efi", + "NAME": "sda15", + "SERIAL": "", + "SIZE": "519045632", + "TYPE": "part", + "UUID": "F5EB-013D", + "device_name_cloud": "", + "device_name_os": "/dev/sda15", + "parttable_type": "gpt" + }, + { + "FSTYPE": "xfs", + "HCTL": "", + "MODEL": "", + "MOUNTPOINT": "/", + "NAME": "sda2", + "SERIAL": "", + "SIZE": "31161581568", + "TYPE": "part", + "UUID": "40a878b6-3fe8-4336-820a-951a19f79a76", + "device_name_cloud": "", + "device_name_os": "/dev/sda2", + "parttable_type": "gpt" + }, + { + "FSTYPE": "", + "HCTL": "0:0:0:1", + "MODEL": "Virtual Disk", + "MOUNTPOINT": "", + "NAME": "sdb", + "SERIAL": "60022480c891da018bdd14b5dd1895b0", + "SIZE": "4294967296", + "TYPE": "disk", + "UUID": "", + "device_name_cloud": "RESOURCEDISK", + "device_name_os": "/dev/sdb", + "parttable_type": "dos" + }, + { + "FSTYPE": "ext4", + "HCTL": "", + "MODEL": "", + "MOUNTPOINT": "/mnt/resource", + "NAME": "sdb1", + "SERIAL": "", + "SIZE": "4292870144", + "TYPE": "part", + "UUID": "95192b50-0c76-4a03-99a7-67fdc225504f", + "device_name_cloud": "", + "device_name_os": "/dev/sdb1", + "parttable_type": "dos" + }, + { + "FSTYPE": "", + "HCTL": "1:0:0:0", + "MODEL": "Virtual Disk", + "MOUNTPOINT": "", + "NAME": "sdc", + "SERIAL": "60022480b71fde48d1f2212130abc54e", + "SIZE": "1073741824", + "TYPE": "disk", + "UUID": "", + "device_name_cloud": "0", + "device_name_os": "/dev/sdc", + "parttable_type": "" + }, + { + "FSTYPE": "", + "HCTL": "1:0:0:1", + "MODEL": "Virtual Disk", + "MOUNTPOINT": "", + "NAME": "sdd", + "SERIAL": "60022480aa9c0d340c125a5295ee678d", + "SIZE": "1073741824", + "TYPE": "disk", + "UUID": "", + "device_name_cloud": "1", + "device_name_os": "/dev/sdd", + "parttable_type": "" + } +] ''' from ctypes import * from fcntl import ioctl import subprocess +import os import sys import json import re @@ -220,8 +347,8 @@ def __init__(self, module, **kwds): def get_lsblk(self): # Get all existing block volumes by key=value, then parse this into a dictionary (which excludes non disk and partition block types, e.g. ram, loop). Cannot use the --json output as it not supported on older versions of lsblk (e.g. CentOS 7) - lsblk_devices = subprocess.check_output(['lsblk', '-o', 'NAME,TYPE,UUID,FSTYPE,MOUNTPOINT,MODEL,SERIAL,SIZE', '-P', '-b']).decode().rstrip().split('\n') - os_device_names = [dict((map(lambda x: x.strip("\""), sub.split("="))) for sub in dev.split('\" ') if '=' in sub) for dev in lsblk_devices] + lsblk_devices = subprocess.check_output(['lsblk', '-o', 'NAME,TYPE,UUID,FSTYPE,MOUNTPOINT,MODEL,SERIAL,SIZE,HCTL', '-P', '-b']).decode().rstrip().split('\n') + os_device_names = [dict((map(lambda x: x.strip("\"").rstrip(), sub.split("="))) for sub in dev.split('\" ') if '=' in sub) for dev in lsblk_devices] os_device_names = [dev for dev in os_device_names if dev['TYPE'] in ['disk', 'part', 'lvm']] os_device_names.sort(key=lambda k: k['NAME']) @@ -245,6 +372,26 @@ def __init__(self, **kwds): os_device.update({"device_name_os": "/dev/" + os_device['NAME'], "device_name_cloud": ""}) +class cAzureMapper(cBlockDevMap): + def __init__(self, **kwds): + super(cAzureMapper, self).__init__(**kwds) + + self.device_map = self.get_lsblk() + + # The Azure root and resource disks are symlinked at install time (by cloud-init) to /dev/disk/cloud/azure_[root|resource]. (They are NOT at predictable /dev/sd[a|b] locations) + # Other managed 'azure_datadisk' disks are mapped by udev (/etc/udev/rules.d/66-azure-storage.rules) when attached. + devrootdisk = os.path.basename(os.path.realpath('/dev/disk/cloud/azure_root')) + devresourcedisk = os.path.basename(os.path.realpath('/dev/disk/cloud/azure_resource')) + + for os_device in self.device_map: + os_device.update({"device_name_os": "/dev/" + os_device['NAME']}) + if os_device['NAME'] not in [devrootdisk,devresourcedisk]: + lun = os_device['HCTL'].split(':')[-1] if len(os_device['HCTL']) else "" + os_device.update({"device_name_cloud": lun}) + else: + os_device.update({"device_name_cloud": "ROOTDISK" if os_device['NAME'] in devrootdisk else "RESOURCEDISK"}) + + class cGCPMapper(cBlockDevMap): def __init__(self, **kwds): super(cGCPMapper, self).__init__(**kwds) @@ -324,11 +471,11 @@ def get_block_device(self, stripped=False): def main(): if not (len(sys.argv) > 1 and sys.argv[1] == "console"): - module = AnsibleModule(argument_spec={"cloud_type": {"type": "str", "required": True, "choices": ['aws', 'gcp', 'lsblk']}}, supports_check_mode=True) + module = AnsibleModule(argument_spec={"cloud_type": {"type": "str", "required": True, "choices": ['aws', 'gcp', 'azure', 'lsblk']}}, supports_check_mode=True) else: # For testing without Ansible (e.g on Windows) class cDummyAnsibleModule(): - params = {"cloud_type": "aws"} + params = {"cloud_type": "azure"} def exit_json(self, changed, **kwargs): print(changed, json.dumps(kwargs, sort_keys=True, indent=4, separators=(',', ': '))) @@ -346,6 +493,8 @@ def fail_json(self, msg): blockdevmap = cAwsMapper(module=module) elif module.params['cloud_type'] == 'gcp': blockdevmap = cGCPMapper(module=module) + elif module.params['cloud_type'] == 'azure': + blockdevmap = cAzureMapper(module=module) elif module.params['cloud_type'] == 'lsblk': blockdevmap = cLsblkMapper(module=module) else: diff --git a/_dependencies/library/blockdevmap_LICENSE b/_dependencies/library/blockdevmap_LICENSE index 7d404386..84260345 100644 --- a/_dependencies/library/blockdevmap_LICENSE +++ b/_dependencies/library/blockdevmap_LICENSE @@ -3,7 +3,7 @@ BSD 3-Clause License -Copyright (c) 2020, Dougal Seeley +Copyright (c) 2021, Dougal Seeley All rights reserved. Redistribution and use in source and binary forms, with or without diff --git a/_dependencies/library/blockdevmap_README.md b/_dependencies/library/blockdevmap_README.md index 330f2888..5ede9fb6 100644 --- a/_dependencies/library/blockdevmap_README.md +++ b/_dependencies/library/blockdevmap_README.md @@ -1,13 +1,23 @@ # blockdevmap -This is an Ansible module that is able to map AWS and GCP device names to the host device names. It returns a dictionary, derived from Linux `lsblk`, (augmented in the case of AWS with results from elsewhere). +This is an Ansible module (or python script) that is able to map AWS, GCP and Azure device names to the host device names. It returns a list of dictionaries (per disk), derived from Linux `lsblk`, (augmented in the case of AWS & Azure with disk information from other machine metadata). + +## Output +### Common ++ `lsblk` is run for options: NAME,TYPE,UUID,FSTYPE,MOUNTPOINT,MODEL,SERIAL,SIZE,HCTL (all of which are supported from CentOS7+ and Ubuntu1804+). ++ _NAME_ is always the OS device name, although in some cases symlinked to a `xvd[\d]` name + + For convenience, this is copied to a parameter `device_name_os` ++ A parameter `device_name_cloud` is created that relates to the name the cloud gives to the device when it is created. ### AWS + On AWS 'nitro' instances all EBS mappings are attached to the NVME controller. The nvme mapping is non-deterministic though, so the script uses ioctl commands to query the nvme controller (from a script by Amazon that is present on 'Amazon Linux' machines: `/sbin/ebsnvme-id`. See documentation: https://docs.aws.amazon.com/AWSEC2/latest/UserGuide/nvme-ebs-volumes). -+ For non-nitro EBS mapping, the script enumerates the mapping in the alphanumerical order of the disk device names. This is the correct order except for some very old RHEL/Centos AMIs, which are not supported. ++ For non-nitro EBS mapping, the script enumerates the mapping in the alphanumerical order of the disk device names. This is the correct order except for some very old RHEL/Centos AMIs, which are not supported. + For ephemeral volume mapping, it uses the http://169.254.169.254/latest/meta-data/block-device-mapping/ endpoint. ### GCP -+ GCP device names are user-defined, and appear as entries in the `lsblk` _SERIAL_ column, mapped to the `lsblk` _NAME_ column. ++ GCP device names are user-defined, and appear as entries in the `lsblk` _SERIAL_ column (which is copied for consistency to `device_name_cloud`). + +### Azure ++ Azure LUNs are user-defined, and appear as the last entry in the `lsblk` _HCTL_ column, (which is copied for consistency to `device_name_cloud`). ### lsblk + The script can be run as plain `lsblk` command, where the cloud provider does not include a mapping, and will return the information as a dictionary. For example, the _bytes_ mapped to the _NAME_ field could be cross-checked against the requested disk size to create a mapping. @@ -16,15 +26,9 @@ This is an Ansible module that is able to map AWS and GCP device names to the ho ## Execution This can be run as an Ansible module (needs root): ```yaml -- name: Get block device map information for GCP - blockdevmap: - cloud_type: gcp - become: yes - register: r__blockdevmap - -- name: Get block device map information for AWS +- name: Get block device map information for cloud blockdevmap: - cloud_type: aws + cloud_type: become: yes register: r__blockdevmap diff --git a/_dependencies/tasks/main.yml b/_dependencies/tasks/main.yml index 6be99861..14106173 100644 --- a/_dependencies/tasks/main.yml +++ b/_dependencies/tasks/main.yml @@ -52,6 +52,10 @@ - assert: { that: "cluster_vars[buildenv] | json_query(\"hosttype_vars.*.auto_volumes[] | [?contains(`/dev/sdb,/dev/sdc,/dev/sdd,/dev/sde`, device_name) && volume_type!='ephemeral']\") | length == 0", fail_msg: "device_names /dev/sd[b-e] are only allowed for ephemeral volumes in AWS cluster_vars[buildenv].hosttype_vars. Please start non-ephemeral devices at /dev/sdf." } when: cluster_vars.type == "aws" + - assert: + that: "'{%- for hosttype in cluster_vars[buildenv].hosttype_vars | dict2items -%}{%- if ('lvmparams' not in hosttype.value and (hosttype.value.auto_volumes | length) == (hosttype.value.auto_volumes | map(attribute='mountpoint') | list | unique | count)) or ('lvmparams' in hosttype.value and (hosttype.value.auto_volumes | map(attribute='mountpoint') | list | unique | count == 1)) -%}{%- else -%}{{hosttype.key}}{%- endif -%}{%- endfor -%}' == ''" + fail_msg: "All volume mountpoints must either be all different, or all the same (in which case, 'lvmparams' must be set)" + - name: Create gcp service account contents file from cluster_vars[buildenv].gcp_service_account_rawtext (unless already defined by user) block: diff --git a/clean/tasks/aws.yml b/clean/tasks/aws.yml new file mode 100644 index 00000000..aad3fee1 --- /dev/null +++ b/clean/tasks/aws.yml @@ -0,0 +1,36 @@ +--- + +- name: clean/aws | clean vms + block: + - name: clean/aws | Remove instances termination protection + ec2: + aws_access_key: "{{cluster_vars[buildenv].aws_access_key}}" + aws_secret_key: "{{cluster_vars[buildenv].aws_secret_key}}" + region: "{{ cluster_vars.region }}" + state: "{{ item.instance_state }}" + termination_protection: "no" + instance_ids: ["{{ item.instance_id }}"] + with_items: "{{ hosts_to_clean | json_query(\"[].{instance_id:instance_id, instance_state: instance_state}\") | default([]) }}" + + - name: clean/aws | Delete VMs + ec2: + aws_access_key: "{{cluster_vars[buildenv].aws_access_key}}" + aws_secret_key: "{{cluster_vars[buildenv].aws_secret_key}}" + region: "{{ cluster_vars.region }}" + state: "absent" + instance_ids: "{{ hosts_to_clean | json_query(\"[].instance_id\") }}" + wait: true + when: hosts_to_clean | length + + +- name: clean/aws | clean networking (when '-e clean=_all_') + block: + - name: clean/aws | Delete security group + ec2_group: + aws_access_key: "{{cluster_vars[buildenv].aws_access_key}}" + aws_secret_key: "{{cluster_vars[buildenv].aws_secret_key}}" + region: "{{cluster_vars.region}}" + name: "{{ cluster_name }}-sg" + vpc_id: "{{vpc_id}}" + state: absent + when: clean == '_all_' diff --git a/clean/tasks/azure.yml b/clean/tasks/azure.yml new file mode 100644 index 00000000..7d049767 --- /dev/null +++ b/clean/tasks/azure.yml @@ -0,0 +1,146 @@ +--- + +- name: clean/azure | clean vms (and all dependent infrastructure) + block: + - name: clean/azure | Delete VMs (and all attached infra (NIC/IP/Storage)) asynchronously + azure.azcollection.azure_rm_virtualmachine: + client_id: "{{cluster_vars[buildenv].azure_client_id}}" + secret: "{{cluster_vars[buildenv].azure_secret}}" + subscription_id: "{{cluster_vars[buildenv].azure_subscription_id}}" + tenant: "{{cluster_vars[buildenv].azure_tenant}}" + resource_group: "{{cluster_vars[buildenv].azure_resource_group}}" + name: "{{item.name}}" + remove_on_absent: ["all"] + state: absent + register: r__azure_rm_virtualmachine + loop: "{{ hosts_to_clean }}" + async: 7200 + poll: 0 + + - name: clean/azure | Wait for instance deletion to complete + async_status: { jid: "{{ item.ansible_job_id }}" } + register: r__async_status__azure_rm_virtualmachine + until: r__async_status__azure_rm_virtualmachine.finished + delay: 3 + retries: 300 + with_items: "{{r__azure_rm_virtualmachine.results}}" + when: hosts_to_clean | length + + +#### ALTERNATE - IF NOT RELYING ON ANSIBLE-CREATED VMs +#- name: clean/azure | clean vms +# block: +# - name: clean/azure | Get instance resource info +# azure.azcollection.azure_rm_resource_info: +# client_id: "{{cluster_vars[buildenv].azure_client_id}}" +# secret: "{{cluster_vars[buildenv].azure_secret}}" +# subscription_id: "{{cluster_vars[buildenv].azure_subscription_id}}" +# tenant: "{{cluster_vars[buildenv].azure_tenant}}" +# resource_group: "{{cluster_vars[buildenv].azure_resource_group}}" +# resource_name: "{{ item.name }}" +# resource_type: VirtualMachines +# provider: Compute +# with_items: "{{ hosts_to_clean }}" +# register: r__azure_rm_resource_info__vm +# async: 7200 +# poll: 0 +# +# - name: clean/azure | Wait for instance resource info (to get Zone info) +# async_status: { jid: "{{ item.ansible_job_id }}" } +# register: r__async_status__azure_rm_resource_info__vm +# until: r__async_status__azure_rm_resource_info__vm.finished +# delay: 3 +# retries: 300 +# with_items: "{{r__azure_rm_resource_info__vm.results}}" +# +# +# - name: clean/azure | Delete VMs asynchronously +# azure.azcollection.azure_rm_virtualmachine: +# client_id: "{{cluster_vars[buildenv].azure_client_id}}" +# secret: "{{cluster_vars[buildenv].azure_secret}}" +# subscription_id: "{{cluster_vars[buildenv].azure_subscription_id}}" +# tenant: "{{cluster_vars[buildenv].azure_tenant}}" +# resource_group: "{{cluster_vars[buildenv].azure_resource_group}}" +# name: "{{item.name}}" +# remove_on_absent: ["all_autocreated"] +# state: absent +# register: r__azure_rm_virtualmachine +# loop: "{{ hosts_to_clean }}" +# async: 7200 +# poll: 0 +# +# - name: clean/azure | Wait for instance deletion to complete +# async_status: { jid: "{{ item.ansible_job_id }}" } +# register: r__async_status__azure_rm_virtualmachine +# until: r__async_status__azure_rm_virtualmachine.finished +# delay: 3 +# retries: 300 +# with_items: "{{r__azure_rm_virtualmachine.results}}" +# +# - name: create/azure | Delete managed disks +# azure.azcollection.azure_rm_manageddisk: +# client_id: "{{cluster_vars[buildenv].azure_client_id}}" +# secret: "{{cluster_vars[buildenv].azure_secret}}" +# subscription_id: "{{cluster_vars[buildenv].azure_subscription_id}}" +# tenant: "{{cluster_vars[buildenv].azure_tenant}}" +# resource_group: "{{cluster_vars[buildenv].azure_resource_group}}" +# name: "{{ item }}" +# state: absent +# loop: "{{ r__async_status__azure_rm_resource_info__vm.results | json_query(\"[[].response[].properties.storageProfile.dataDisks[].name, [].response[].properties.storageProfile.osDisk.name][]\") }}" +# register: r__aazure_rm_manageddisk +# async: 7200 +# poll: 0 +# +# - name: clean/azure | Wait for managed disk deletion +# async_status: { jid: "{{ item.ansible_job_id }}" } +# register: r__async_status__aazure_rm_manageddisk +# until: r__async_status__aazure_rm_manageddisk.finished +# delay: 3 +# retries: 300 +# with_items: "{{r__aazure_rm_manageddisk.results}}" +# +# +#- name: clean/azure | clean networking (when '-e clean=_all_') +# block: +# - name: clean/azure | Get network interface info (per instance) +# azure.azcollection.azure_rm_networkinterface_info: +# client_id: "{{cluster_vars[buildenv].azure_client_id}}" +# secret: "{{cluster_vars[buildenv].azure_secret}}" +# subscription_id: "{{cluster_vars[buildenv].azure_subscription_id}}" +# tenant: "{{cluster_vars[buildenv].azure_tenant}}" +# resource_group: "{{cluster_vars[buildenv].azure_resource_group}}" +# name: "{{ item | basename }}" +# loop: "{{ r__async_status__azure_rm_resource_info__vm.results | json_query(\"[].response[].properties.networkProfile.networkInterfaces[].id\") }}" +# register: r__azure_rm_networkinterface_info +# async: 7200 +# poll: 0 +# +# - name: clean/azure | Wait for network interface info +# async_status: { jid: "{{ item.ansible_job_id }}" } +# register: r__async_status__azure_rm_networkinterface_info +# until: r__async_status__azure_rm_networkinterface_info.finished +# delay: 3 +# retries: 300 +# with_items: "{{r__azure_rm_networkinterface_info.results}}" +# +# - name: clean/azure | Delete public ipaddresses +# azure.azcollection.azure_rm_publicipaddress: +# client_id: "{{cluster_vars[buildenv].azure_client_id}}" +# secret: "{{cluster_vars[buildenv].azure_secret}}" +# subscription_id: "{{cluster_vars[buildenv].azure_subscription_id}}" +# tenant: "{{cluster_vars[buildenv].azure_tenant}}" +# resource_group: "{{cluster_vars[buildenv].azure_resource_group}}" +# name: "{{ item.public_ip_id | basename }}" +# with_items: "{{ r__async_status__azure_rm_networkinterface_info.results | json_query(\"[].networkinterfaces[].ip_configurations[].public_ip_address\") }}" +# register: r__azure_rm_networkinterface +# async: 7200 +# poll: 0 +# +# - name: clean/azure | Wait for publicipaddress deletion +# async_status: { jid: "{{ item.ansible_job_id }}" } +# register: r__async_status__azure_rm_publicipaddress +# until: r__async_status__azure_rm_publicipaddress.finished +# delay: 3 +# retries: 300 +# with_items: "{{r__azure_rm_networkinterface.results}}" +# when: clean == '_all_' diff --git a/clean/tasks/clean_networking.yml b/clean/tasks/clean_networking.yml deleted file mode 100644 index 471e1ee5..00000000 --- a/clean/tasks/clean_networking.yml +++ /dev/null @@ -1,31 +0,0 @@ ---- - -- name: clean/networking/aws | Delete AWS security group - ec2_group: - name: "{{ cluster_name }}-sg" - region: "{{cluster_vars.region}}" - vpc_id: "{{vpc_id}}" - aws_access_key: "{{cluster_vars[buildenv].aws_access_key}}" - aws_secret_key: "{{cluster_vars[buildenv].aws_secret_key}}" - state: absent - when: cluster_vars.type == "aws" - -- block: - - name: clean/networking/gcp | Delete GCP cluster firewalls - gcp_compute_firewall: - name: "{{ item.name }}" - state: "absent" - auth_kind: "serviceaccount" - service_account_file: "{{gcp_credentials_file}}" - project: "{{cluster_vars[buildenv].vpc_host_project_id}}" - with_items: "{{ cluster_vars.firewall_rules }}" - - - name: clean/networking/gcp | Delete the GCP network (if -e create_gcp_network=true) - gcp_compute_network: - name: "{{cluster_vars[buildenv].vpc_network_name}}" - auth_kind: "serviceaccount" - service_account_file: "{{gcp_credentials_file}}" - project: "{{cluster_vars[buildenv].vpc_host_project_id}}" - state: absent - when: create_gcp_network is defined and create_gcp_network|bool - when: cluster_vars.type == "gcp" diff --git a/clean/tasks/clean_vms.yml b/clean/tasks/clean_vms.yml deleted file mode 100644 index aa94f1ce..00000000 --- a/clean/tasks/clean_vms.yml +++ /dev/null @@ -1,89 +0,0 @@ ---- - -- name: clean/del_vms | hosts_to_clean - debug: msg="{{hosts_to_clean}}" - -- block: - - block: - - name: clean/del_vms/aws | Remove EC2 instances termination protection - ec2: - aws_access_key: "{{cluster_vars[buildenv].aws_access_key}}" - aws_secret_key: "{{cluster_vars[buildenv].aws_secret_key}}" - region: "{{ cluster_vars.region }}" - state: "{{ item.instance_state }}" - termination_protection: "no" - instance_ids: ["{{ item.instance_id }}"] - with_items: "{{ hosts_to_clean | json_query(\"[].{instance_id:instance_id, instance_state: instance_state}\") | default([]) }}" - - - name: clean/del_vms/aws | Delete EC2 instances - ec2: - aws_access_key: "{{cluster_vars[buildenv].aws_access_key}}" - aws_secret_key: "{{cluster_vars[buildenv].aws_secret_key}}" - region: "{{ cluster_vars.region }}" - state: "absent" - instance_ids: "{{ hosts_to_clean | json_query(\"[].instance_id\") }}" - wait: true - when: cluster_vars.type == "aws" - - - block: - - name: clean/del_vms/gcp | Remove deletion protection - command: "gcloud compute instances update {{item.name}} --no-deletion-protection --zone {{ item.regionzone }}" - when: cluster_vars[buildenv].deletion_protection | bool - with_items: "{{ hosts_to_clean }}" - -# - name: clean/del_vms/gcp | Remove deletion protection (broken until https://github.com/ansible-collections/ansible_collections_google/pull/163 gets into a release) -# gcp_compute_instance: -# name: "{{item.name}}" -# project: "{{cluster_vars[buildenv].vpc_project_id}}" -# zone: "{{ item.regionzone }}" -# auth_kind: "serviceaccount" -# service_account_file: "{{gcp_credentials_file}}" -# deletion_protection: 'no' -# with_items: "{{ hosts_to_clean }}" - - - name: clean/del_vms/gcp | Delete GCE VM - gcp_compute_instance: - name: "{{item.name}}" - project: "{{cluster_vars[buildenv].vpc_project_id}}" - zone: "{{ item.regionzone }}" - auth_kind: "serviceaccount" - service_account_file: "{{gcp_credentials_file}}" - state: "absent" - with_items: "{{ hosts_to_clean }}" - register: r__gcp_compute_instance - async: 7200 - poll: 0 - - - name: clean/del_vms/gcp | Wait for GCE VM deletion to complete - async_status: - jid: "{{ item.ansible_job_id }}" - register: async_jobs - until: async_jobs.finished - retries: 300 - with_items: "{{r__gcp_compute_instance.results}}" - when: cluster_vars.type == "gcp" - - - block: - - name: clean/del_vms/esxifree | Delete vmware VM - esxifree_guest: - hostname: "{{ cluster_vars.esxi_ip }}" - username: "{{ cluster_vars.username }}" - password: "{{ cluster_vars.password }}" - name: "{{item.name}}" - state: absent - register: esxi_instances - run_once: true - with_items: "{{hosts_to_clean}}" - async: 7200 - poll: 0 - - - name: clean_vms_esxifree | Wait for esxifree VM deletion to complete - async_status: - jid: "{{ item.ansible_job_id }}" - register: esxi_jobs - until: esxi_jobs.finished - retries: 300 - with_items: "{{esxi_instances.results}}" - when: cluster_vars.type == "esxifree" - - when: hosts_to_clean | length \ No newline at end of file diff --git a/clean/tasks/clean_dns.yml b/clean/tasks/dns.yml similarity index 97% rename from clean/tasks/clean_dns.yml rename to clean/tasks/dns.yml index 0de6977a..075e722a 100644 --- a/clean/tasks/clean_dns.yml +++ b/clean/tasks/dns.yml @@ -4,7 +4,7 @@ debug: msg="{{hosts_to_clean}}" - block: - - name: clean/dns/nsupdate | Delete DNS entries from bind (nsupdate) + - name: clean/dns/nsupdate | Delete DNS entries block: - name: clean/dns/nsupdate | Delete A records nsupdate: @@ -33,7 +33,7 @@ when: (item.name + '.' + cluster_vars.dns_user_domain + "." == cname_value) when: cluster_vars.dns_server == "nsupdate" - - name: clean/dns/route53 | Delete DNS entries from route53 + - name: clean/dns/route53 | Delete DNS entries block: - name: clean/dns/route53 | Get A records route53: @@ -90,7 +90,7 @@ when: (item.1.set.value is defined) and ((item.0.name | regex_replace('-(?!.*-).*')) == (item.1.set.record | regex_replace('^(.*?)\\..*$', '\\1'))) and (item.0.name == item.1.set.value | regex_replace('^(.*?)\\..*$', '\\1')) when: cluster_vars.dns_server == "route53" - - name: clean/dns/clouddns | Delete DNS entries from clouddns + - name: clean/dns/clouddns | Delete DNS entries block: - name: clean/dns/clouddns | Get managed zone(s) gcp_dns_managed_zone_info: diff --git a/clean/tasks/esxifree.yml b/clean/tasks/esxifree.yml new file mode 100644 index 00000000..ea542172 --- /dev/null +++ b/clean/tasks/esxifree.yml @@ -0,0 +1,25 @@ +--- + +- name: clean/esxifree + block: + - name: clean/esxifree | Delete VM + esxifree_guest: + hostname: "{{ cluster_vars.esxi_ip }}" + username: "{{ cluster_vars.username }}" + password: "{{ cluster_vars.password }}" + name: "{{item.name}}" + state: absent + register: esxi_instances + run_once: true + with_items: "{{hosts_to_clean}}" + async: 7200 + poll: 0 + + - name: clean/esxifree | Wait for VM deletion to complete + async_status: + jid: "{{ item.ansible_job_id }}" + register: esxi_jobs + until: esxi_jobs.finished + retries: 300 + with_items: "{{esxi_instances.results}}" + when: hosts_to_clean | length diff --git a/clean/tasks/gcp.yml b/clean/tasks/gcp.yml new file mode 100644 index 00000000..a37910a8 --- /dev/null +++ b/clean/tasks/gcp.yml @@ -0,0 +1,62 @@ +--- + +- name: clean/gcp | clean vms + block: + - name: clean/gcp | Remove deletion protection + command: "gcloud compute instances update {{item.name}} --no-deletion-protection --zone {{ item.regionzone }}" + when: cluster_vars[buildenv].deletion_protection | bool + with_items: "{{ hosts_to_clean }}" + + #- name: clean/gcp | Remove deletion protection (broken until https://github.com/ansible-collections/ansible_collections_google/pull/163 gets into a release) + # gcp_compute_instance: + # name: "{{item.name}}" + # project: "{{cluster_vars[buildenv].vpc_project_id}}" + # zone: "{{ item.regionzone }}" + # auth_kind: "serviceaccount" + # service_account_file: "{{gcp_credentials_file}}" + # deletion_protection: 'no' + # with_items: "{{ hosts_to_clean }}" + + - name: clean/gcp | Delete VMs + gcp_compute_instance: + name: "{{item.name}}" + project: "{{cluster_vars[buildenv].vpc_project_id}}" + zone: "{{ item.regionzone }}" + auth_kind: "serviceaccount" + service_account_file: "{{gcp_credentials_file}}" + state: "absent" + with_items: "{{ hosts_to_clean }}" + register: r__gcp_compute_instance + async: 7200 + poll: 0 + + - name: clean/gcp | Wait for VM deletion to complete + async_status: + jid: "{{ item.ansible_job_id }}" + register: async_jobs + until: async_jobs.finished + retries: 300 + with_items: "{{r__gcp_compute_instance.results}}" + when: hosts_to_clean | length + + +- name: clean/gcp | clean networking (when '-e clean=_all_') + block: + - name: clean/networking/gcp | Delete GCP cluster firewalls + gcp_compute_firewall: + name: "{{ item.name }}" + state: "absent" + auth_kind: "serviceaccount" + service_account_file: "{{gcp_credentials_file}}" + project: "{{cluster_vars[buildenv].vpc_host_project_id}}" + with_items: "{{ cluster_vars.firewall_rules }}" + + - name: clean/gcp | Delete the GCP network (if -e create_gcp_network=true) + gcp_compute_network: + name: "{{cluster_vars[buildenv].vpc_network_name}}" + auth_kind: "serviceaccount" + service_account_file: "{{gcp_credentials_file}}" + project: "{{cluster_vars[buildenv].vpc_host_project_id}}" + state: absent + when: create_gcp_network is defined and create_gcp_network|bool + when: clean == '_all_' diff --git a/clean/tasks/main.yml b/clean/tasks/main.yml index 786b8c24..8c265565 100644 --- a/clean/tasks/main.yml +++ b/clean/tasks/main.yml @@ -2,16 +2,15 @@ - name: "Clean the cluster of VMs with lifecycle_state = {{clean}}" block: + - name: clean | hosts_to_clean + debug: msg={{hosts_to_clean}} + - name: clean | Delete DNS - include_tasks: clean_dns.yml + include_tasks: dns.yml when: (cluster_vars.dns_server is defined and cluster_vars.dns_server != "") and (cluster_vars.dns_user_domain is defined and cluster_vars.dns_user_domain != "") - - name: clean | Delete VMs - include_tasks: clean_vms.yml - - - name: clean | Delete Networking - include_tasks: clean_networking.yml - when: clean == '_all_' + - name: "clean | {{cluster_vars.type}}" + include_tasks: "{{cluster_vars.type}}.yml" vars: hosts_to_clean: | {%- if clean == '_all_' -%} diff --git a/cluster_hosts/tasks/get_cluster_hosts_state.yml b/cluster_hosts/tasks/get_cluster_hosts_state.yml deleted file mode 100644 index 84cf440c..00000000 --- a/cluster_hosts/tasks/get_cluster_hosts_state.yml +++ /dev/null @@ -1,103 +0,0 @@ ---- - -- name: get_cluster_hosts_state/aws | Get AWS cluster_hosts_state - block: - - name: get_cluster_hosts_state/aws | Get existing EC2 instance info - ec2_instance_info: - filters: - "tag:cluster_name": "{{cluster_name}}" - "instance-state-name": ["running", "stopped"] - aws_access_key: "{{cluster_vars[buildenv].aws_access_key}}" - aws_secret_key: "{{cluster_vars[buildenv].aws_secret_key}}" - region: "{{cluster_vars.region}}" - register: r__ec2_instance_info - delegate_to: localhost - run_once: true - - - name: get_cluster_hosts_state/aws | Set cluster_hosts_state - set_fact: - cluster_hosts_state: "{{r__ec2_instance_info.instances | json_query(\"[].{name: tags.Name, regionzone: placement.availability_zone, tagslabels: tags, instance_id: instance_id, instance_state: state.name}\") }}" - when: cluster_vars.type == "aws" - -- name: get_cluster_hosts_state/gcp | Get GCP cluster_hosts_state - block: - - name: get_cluster_hosts_state/gcp | Get existing GCE instance info (per AZ) - gcp_compute_instance_info: - zone: "{{cluster_vars.region}}-{{item}}" - filters: - - "labels.cluster_name = {{cluster_name}}" - project: "{{cluster_vars[buildenv].vpc_project_id}}" - auth_kind: "serviceaccount" - service_account_file: "{{gcp_credentials_file}}" - scopes: ["https://www.googleapis.com/auth/compute.readonly"] - with_items: "{{ cluster_vars[buildenv].hosttype_vars | json_query(\"*[vms_by_az][][keys(@)][][]\") | unique }}" - register: r__gcp_compute_instance_info - delegate_to: localhost - run_once: true - - - name: get_cluster_hosts_state/gcp | Set cluster_hosts_state with correct regionzone (remove url) - set_fact: - cluster_hosts_state: | - {% set res = _cluster_hosts_state__urlregion -%} - {%- for cluster_host in res -%} - {%- set _ = cluster_host.update({'regionzone': cluster_host.regionzone | regex_replace('^.*/(.*)$', '\\1') }) -%} - {%- endfor -%} - {{ res }} - vars: - _cluster_hosts_state__urlregion: "{{r__gcp_compute_instance_info.results | json_query(\"[?resources[?labels]].resources[].{name: name, regionzone: zone, tagslabels: labels, instance_id: id, instance_state: status}\") }}" - when: cluster_vars.type == "gcp" - -- name: get_cluster_hosts_state_esxifree | Get VMware cluster_hosts_state - block: - - name: get_cluster_hosts_state/esxifree | Get existing VMware instance info - vmware_vm_info: - username: "{{ cluster_vars.username }}" - password: "{{ cluster_vars.password }}" - hostname: "{{ cluster_vars.esxi_ip }}" - validate_certs: no - register: r__vmware_vm_info - delegate_to: localhost - run_once: true - - - name: get_cluster_hosts_state/esxifree | Get existing VMware instance facts - vmware_guest_info: - username: "{{ cluster_vars.username }}" - password: "{{ cluster_vars.password }}" - hostname: "{{ cluster_vars.esxi_ip }}" - validate_certs: no - datacenter: None - uuid: "{{item.uuid}}" - with_items: "{{ r__vmware_vm_info.virtual_machines | to_json | from_json | json_query(\"[?starts_with(guest_name, '\"+cluster_name+\"')]\") }}" - register: r__vmware_guest_info - delegate_to: localhost - run_once: true - - ## esxifree hosts must use the esxi 'annotations' field as json. They are stored as unconventional text in the vmx file, so must - ## be converted into inline-json within the facts. If the annotation field is not convertible to json, then we don't consider this VM part of the cluster. - - name: get_cluster_hosts_state/esxifree | update r__vmware_guest_info result with json-parsed annotations - set_fact: - r__vmware_guest_info: | - {% set res = {'results': []} -%} - {%- for result in r__vmware_guest_info.results -%} - {%- set loadloose_res = result.instance.annotation | json_loads_loose -%} - {%- if loadloose_res | type_debug == 'dict' or loadloose_res | type_debug == 'list' -%} - {%- set _ = result.instance.update({'annotation': loadloose_res}) -%} - {%- set _ = res.results.append(result) -%} - {%- endif -%} - {%- endfor -%} - {{ res }} - - - name: get_cluster_hosts_state/esxifree | Set cluster_hosts_state - set_fact: - cluster_hosts_state: "{{ r__vmware_guest_info.results | json_query(\"[].{name: instance.hw_name, regionzone: None, tagslabels: instance.annotation, instance_id: instance.moid, instance_state: instance.hw_power_status}\") }}" - - when: cluster_vars.type == "esxifree" - - -- name: get_cluster_hosts_state | cluster_hosts_state - debug: msg="{{cluster_hosts_state}}" - delegate_to: localhost - run_once: true - when: cluster_hosts_state is defined - -#- pause: \ No newline at end of file diff --git a/cluster_hosts/tasks/get_cluster_hosts_state_aws.yml b/cluster_hosts/tasks/get_cluster_hosts_state_aws.yml new file mode 100644 index 00000000..63d8bc25 --- /dev/null +++ b/cluster_hosts/tasks/get_cluster_hosts_state_aws.yml @@ -0,0 +1,17 @@ +--- + +- name: get_cluster_hosts_state/aws | Get existing instance info + ec2_instance_info: + filters: + "tag:cluster_name": "{{cluster_name}}" + "instance-state-name": ["running", "stopped"] + aws_access_key: "{{cluster_vars[buildenv].aws_access_key}}" + aws_secret_key: "{{cluster_vars[buildenv].aws_secret_key}}" + region: "{{cluster_vars.region}}" + register: r__ec2_instance_info + delegate_to: localhost + run_once: true + +- name: get_cluster_hosts_state/aws | Set cluster_hosts_state + set_fact: + cluster_hosts_state: "{{r__ec2_instance_info.instances | json_query(\"[].{name: tags.Name, regionzone: placement.availability_zone, tagslabels: tags, instance_id: instance_id, instance_state: state.name}\") }}" diff --git a/cluster_hosts/tasks/get_cluster_hosts_state_azure.yml b/cluster_hosts/tasks/get_cluster_hosts_state_azure.yml new file mode 100644 index 00000000..70a29d08 --- /dev/null +++ b/cluster_hosts/tasks/get_cluster_hosts_state_azure.yml @@ -0,0 +1,41 @@ +--- + +- name: get_cluster_hosts_state/azure | Get existing instance info + azure.azcollection.azure_rm_virtualmachine_info: + client_id: "{{cluster_vars[buildenv].azure_client_id}}" + secret: "{{cluster_vars[buildenv].azure_secret}}" + subscription_id: "{{cluster_vars[buildenv].azure_subscription_id}}" + tenant: "{{cluster_vars[buildenv].azure_tenant}}" + resource_group: "{{cluster_vars[buildenv].azure_resource_group}}" + tags: + - "cluster_name:{{cluster_name}}" + register: r__azure_rm_virtualmachine_info + delegate_to: localhost + run_once: true + +- name: get_cluster_hosts_state/azure | Get zone instance info + azure.azcollection.azure_rm_resource_info: + client_id: "{{cluster_vars[buildenv].azure_client_id}}" + secret: "{{cluster_vars[buildenv].azure_secret}}" + subscription_id: "{{cluster_vars[buildenv].azure_subscription_id}}" + tenant: "{{cluster_vars[buildenv].azure_tenant}}" + resource_group: "{{cluster_vars[buildenv].azure_resource_group}}" + resource_name: "{{ item.name }}" + resource_type: VirtualMachines + provider: Compute + with_items: "{{ r__azure_rm_virtualmachine_info.vms }}" + register: r__azure_rm_resource_info + until: "(r__azure_rm_resource_info.response | json_query(\"[?properties.provisioningState!='Succeeded']|length(@)\")) == 0" + retries: 18 #3 mins + delay: 10 + delegate_to: localhost + run_once: true + +- name: get_cluster_hosts_state/azure | r__azure_rm_resource_info + debug: msg="{{r__azure_rm_resource_info}}" + delegate_to: localhost + run_once: true + +- name: get_cluster_hosts_state/azure | Set cluster_hosts_state + set_fact: + cluster_hosts_state: "{{r__azure_rm_resource_info.results | json_query(\"[].{name: response[0].name, regionzone: join('-',[response[0].location,response[0].zones[0]]), tagslabels: response[0].tags, instance_id: response[0].id, instance_state: item.power_state}\") }}" diff --git a/cluster_hosts/tasks/get_cluster_hosts_state_esxifree.yml b/cluster_hosts/tasks/get_cluster_hosts_state_esxifree.yml new file mode 100644 index 00000000..e39b2ae8 --- /dev/null +++ b/cluster_hosts/tasks/get_cluster_hosts_state_esxifree.yml @@ -0,0 +1,43 @@ +--- + +- name: get_cluster_hosts_state/esxifree | Get basic instance info of all vms + vmware_vm_info: + username: "{{ cluster_vars.username }}" + password: "{{ cluster_vars.password }}" + hostname: "{{ cluster_vars.esxi_ip }}" + validate_certs: no + register: r__vmware_vm_info + delegate_to: localhost + run_once: true + +- name: get_cluster_hosts_state/esxifree | Get detailed instance info of cluster_name VMs + vmware_guest_info: + username: "{{ cluster_vars.username }}" + password: "{{ cluster_vars.password }}" + hostname: "{{ cluster_vars.esxi_ip }}" + validate_certs: no + datacenter: None + uuid: "{{item.uuid}}" + with_items: "{{ r__vmware_vm_info.virtual_machines | to_json | from_json | json_query(\"[?starts_with(guest_name, '\"+cluster_name+\"')]\") }}" + register: r__vmware_guest_info + delegate_to: localhost + run_once: true + +## esxifree hosts must use the esxi 'annotations' field as json. They are stored as unconventional text in the vmx file, so must +## be converted into inline-json within the facts. If the annotation field is not convertible to json, then we don't consider this VM part of the cluster. +- name: get_cluster_hosts_state/esxifree | update r__vmware_guest_info result with json-parsed annotations + set_fact: + r__vmware_guest_info: | + {% set res = {'results': []} -%} + {%- for result in r__vmware_guest_info.results -%} + {%- set loadloose_res = result.instance.annotation | json_loads_loose -%} + {%- if loadloose_res | type_debug == 'dict' or loadloose_res | type_debug == 'list' -%} + {%- set _ = result.instance.update({'annotation': loadloose_res}) -%} + {%- set _ = res.results.append(result) -%} + {%- endif -%} + {%- endfor -%} + {{ res }} + +- name: get_cluster_hosts_state/esxifree | Set cluster_hosts_state + set_fact: + cluster_hosts_state: "{{ r__vmware_guest_info.results | json_query(\"[].{name: instance.hw_name, regionzone: None, tagslabels: instance.annotation, instance_id: instance.moid, instance_state: instance.hw_power_status}\") }}" diff --git a/cluster_hosts/tasks/get_cluster_hosts_state_gcp.yml b/cluster_hosts/tasks/get_cluster_hosts_state_gcp.yml new file mode 100644 index 00000000..0af1caea --- /dev/null +++ b/cluster_hosts/tasks/get_cluster_hosts_state_gcp.yml @@ -0,0 +1,26 @@ +--- + +- name: get_cluster_hosts_state/gcp | Get existing instance info (per AZ) + gcp_compute_instance_info: + zone: "{{cluster_vars.region}}-{{item}}" + filters: + - "labels.cluster_name = {{cluster_name}}" + project: "{{cluster_vars[buildenv].vpc_project_id}}" + auth_kind: "serviceaccount" + service_account_file: "{{gcp_credentials_file}}" + scopes: ["https://www.googleapis.com/auth/compute.readonly"] + with_items: "{{ cluster_vars[buildenv].hosttype_vars | json_query(\"*[vms_by_az][][keys(@)][][]\") | unique }}" + register: r__gcp_compute_instance_info + delegate_to: localhost + run_once: true + +- name: get_cluster_hosts_state/gcp | Set cluster_hosts_state with correct regionzone (remove url) + set_fact: + cluster_hosts_state: | + {% set res = _cluster_hosts_state__urlregion -%} + {%- for cluster_host in res -%} + {%- set _ = cluster_host.update({'regionzone': cluster_host.regionzone | regex_replace('^.*/(.*)$', '\\1') }) -%} + {%- endfor -%} + {{ res }} + vars: + _cluster_hosts_state__urlregion: "{{r__gcp_compute_instance_info.results | json_query(\"[?resources[?labels]].resources[].{name: name, regionzone: zone, tagslabels: labels, instance_id: id, instance_state: status}\") }}" diff --git a/cluster_hosts/tasks/get_cluster_hosts_target.yml b/cluster_hosts/tasks/get_cluster_hosts_target.yml index 9b5f4782..7fea699f 100644 --- a/cluster_hosts/tasks/get_cluster_hosts_target.yml +++ b/cluster_hosts/tasks/get_cluster_hosts_target.yml @@ -10,7 +10,7 @@ {%- for azcount in range(0,cluster_vars[buildenv].hosttype_vars[hostttype].vms_by_az[azname]|int) -%} {% set _dummy = res.extend([{ 'hosttype': hostttype, - 'hostname': cluster_name + '-' + hostttype + '-' + azname + azcount|string + '-' + cluster_suffix|string, + 'hostname': cluster_name + '-' + hostttype + '-' + azname|string + azcount|string + '-' + cluster_suffix|string, 'az_name': azname|string, 'flavor': cluster_vars[buildenv].hosttype_vars[hostttype].flavor, 'auto_volumes': cluster_vars[buildenv].hosttype_vars[hostttype].auto_volumes @@ -20,115 +20,12 @@ {%- endfor %} {{ res }} -- name: get_cluster_hosts_target/aws | AWS-specific modifications to cluster_hosts_target - add subnets. - block: - # Dynamically look up VPC ID by name from aws - - name: get_cluster_hosts_target | Looking up VPC facts to extract ID - ec2_vpc_net_info: - region: "{{ cluster_vars.region }}" - aws_access_key: "{{ cluster_vars[buildenv].aws_access_key }}" - aws_secret_key: "{{ cluster_vars[buildenv].aws_secret_key }}" - filters: - "tag:Name": "{{ cluster_vars[buildenv].vpc_name }}" - register: r__ec2_vpc_net_info - delegate_to: localhost - run_once: true - - name: get_cluster_hosts_target/aws | Set VPC ID in variable - set_fact: - vpc_id: "{{ r__ec2_vpc_net_info.vpcs[0].id }}" +- name: get_cluster_hosts_target | Augment with cloud-specific parameters (if necessary) + include: "{{ item }}" + loop: "{{ query('first_found', params) }}" + vars: { params: { files: ["get_cluster_hosts_target_{{cluster_vars.type}}.yml"], skip: true } } - - name: get_cluster_hosts_target/aws | Look up proxy subnet facts - ec2_vpc_subnet_info: - region: "{{ cluster_vars.region }}" - aws_access_key: "{{ cluster_vars[buildenv].aws_access_key }}" - aws_secret_key: "{{ cluster_vars[buildenv].aws_secret_key }}" - filters: - vpc-id: "{{ vpc_id }}" - register: r__ec2_vpc_subnet_info - delegate_to: localhost - run_once: true - - - name: get_cluster_hosts_target/aws | Update cluster_hosts_target with subnet_ids - set_fact: - cluster_hosts_target: | - {%- for host in cluster_hosts_target -%} - {%- set subnet_id = r__ec2_vpc_subnet_info | to_json | from_json | json_query('subnets[?starts_with(tags.Name, \'' + cluster_vars[buildenv].vpc_subnet_name_prefix + host.az_name +'\')].subnet_id|[0]') -%} - {%- set _dummy = host.update({'vpc_subnet_id': subnet_id | string}) -%} - {%- endfor %} - {{ cluster_hosts_target }} - - - block: - - name: get_cluster_hosts_target/aws | Get snapshots info - ec2_snapshot_info: - aws_access_key: "{{cluster_vars[buildenv].aws_access_key}}" - aws_secret_key: "{{cluster_vars[buildenv].aws_secret_key}}" - region: "{{ cluster_vars.region }}" - filters: "{{ _snapshot_tags[0] }}" - register: r__ebs_snapshots - delegate_to: localhost - run_once: true - - - name: get_cluster_hosts_target/aws | Assert that number of snapshots eq number of hosts - assert: - that: - - _available_snapshots|length == cluster_hosts_target|length - quiet: true - fail_msg: "There are {{ _available_snapshots|length }} available snapshots and {{ cluster_hosts_target|length }} nodes. Snapshot restore available only to the same infrastructure size." - vars: - _available_snapshots: "{{ r__ebs_snapshots.snapshots|json_query('[].snapshot_id') }}" - delegate_to: localhost - run_once: true - - ## [ See github.com/ansible/ansible/issues/27299 for reason for '| to_json | from_json' ] - - name: get_cluster_hosts_target/aws | update cluster_hosts_target with snapshot_id - set_fact: - cluster_hosts_target: | - {%- for host in cluster_hosts_target -%} - {%- set cluster_host_topology = host.hostname | regex_replace('^.*(-.*?-).*$', '\\1') -%} - {%- for vol in host.auto_volumes -%} - {%- set cur_snapshot = r__ebs_snapshots | default([]) | to_json | from_json | json_query('snapshots[?contains(tags.Name, \'' + cluster_host_topology + '\')]') -%} - {%- if cur_snapshot and 'snapshot_tags' in vol.keys() -%} - {%- set _dummy = vol.update({'snapshot': cur_snapshot[0].snapshot_id}) -%} - {%- set _dummy = vol.pop('snapshot_tags') -%} - {%- endif %} - {%- endfor %} - {%- endfor %} - {{ cluster_hosts_target }} - vars: - _snapshot_tags: "{{ cluster_vars[buildenv].hosttype_vars|json_query('*.auto_volumes[].snapshot_tags') }}" - when: _snapshot_tags|length > 0 - when: cluster_vars.type == "aws" - - -- name: get_cluster_hosts_target/gcp | GCP-specific modifications to cluster_hosts_target - block: - - name: get_cluster_hosts_target/gcp | Update cluster_hosts_target auto_volumes with device_name and initialize_params - set_fact: - cluster_hosts_target: |- - {%- for host in cluster_hosts_target -%} - {%- for vol in host.auto_volumes -%} - {%- if 'device_name' not in vol -%} - {%- set _dummy = vol.update({'device_name': host.hostname + '--' + vol.mountpoint | basename }) -%} - {%- set _dummy = vol.update({'initialize_params': {'disk_name': vol.device_name, 'disk_size_gb': vol.volume_size}}) -%} - {%- endif -%} - {%- endfor %} - {%- endfor %} - {{ cluster_hosts_target }} - when: cluster_vars.type == "gcp" - -- name: get_cluster_hosts_target/esxifree | esxifree-specific modifications to cluster_hosts_target - block: - - name: get_cluster_hosts_target/esxifree | Update cluster_hosts_target with volname (derived from the mountpoint) - set_fact: - cluster_hosts_target: | - {%- for host in cluster_hosts_target -%} - {%- for hostvol in host.auto_volumes -%} - {%- set _dummy = hostvol.update({'volname': hostvol.mountpoint | regex_replace('.*\/(.*)', '\\1')}) -%} - {%- endfor %} - {%- endfor %} - {{ cluster_hosts_target }} - when: cluster_vars.type == "esxifree" - name: get_cluster_hosts_target | cluster_hosts_target debug: msg={{cluster_hosts_target}} diff --git a/cluster_hosts/tasks/get_cluster_hosts_target_aws.yml b/cluster_hosts/tasks/get_cluster_hosts_target_aws.yml new file mode 100644 index 00000000..c61f35d4 --- /dev/null +++ b/cluster_hosts/tasks/get_cluster_hosts_target_aws.yml @@ -0,0 +1,78 @@ +--- + +# Dynamically look up VPC ID by name from aws +- name: get_cluster_hosts_target | Looking up VPC facts to extract ID + ec2_vpc_net_info: + region: "{{ cluster_vars.region }}" + aws_access_key: "{{ cluster_vars[buildenv].aws_access_key }}" + aws_secret_key: "{{ cluster_vars[buildenv].aws_secret_key }}" + filters: + "tag:Name": "{{ cluster_vars[buildenv].vpc_name }}" + register: r__ec2_vpc_net_info + delegate_to: localhost + run_once: true + +- name: get_cluster_hosts_target/aws | Set VPC ID in variable + set_fact: + vpc_id: "{{ r__ec2_vpc_net_info.vpcs[0].id }}" + +- name: get_cluster_hosts_target/aws | Look up proxy subnet facts + ec2_vpc_subnet_info: + region: "{{ cluster_vars.region }}" + aws_access_key: "{{ cluster_vars[buildenv].aws_access_key }}" + aws_secret_key: "{{ cluster_vars[buildenv].aws_secret_key }}" + filters: + vpc-id: "{{ vpc_id }}" + register: r__ec2_vpc_subnet_info + delegate_to: localhost + run_once: true + +- name: get_cluster_hosts_target/aws | Update cluster_hosts_target with subnet_ids + set_fact: + cluster_hosts_target: | + {%- for host in cluster_hosts_target -%} + {%- set subnet_id = r__ec2_vpc_subnet_info | to_json | from_json | json_query('subnets[?starts_with(tags.Name, \'' + cluster_vars[buildenv].vpc_subnet_name_prefix + host.az_name +'\')].subnet_id|[0]') -%} + {%- set _dummy = host.update({'vpc_subnet_id': subnet_id | string}) -%} + {%- endfor %} + {{ cluster_hosts_target }} + +- block: + - name: get_cluster_hosts_target/aws | Get snapshots info + ec2_snapshot_info: + aws_access_key: "{{cluster_vars[buildenv].aws_access_key}}" + aws_secret_key: "{{cluster_vars[buildenv].aws_secret_key}}" + region: "{{ cluster_vars.region }}" + filters: "{{ _snapshot_tags[0] }}" + register: r__ebs_snapshots + delegate_to: localhost + run_once: true + + - name: get_cluster_hosts_target/aws | Assert that number of snapshots eq number of hosts + assert: + that: + - _available_snapshots|length == cluster_hosts_target|length + quiet: true + fail_msg: "There are {{ _available_snapshots|length }} available snapshots and {{ cluster_hosts_target|length }} nodes. Snapshot restore available only to the same infrastructure size." + vars: + _available_snapshots: "{{ r__ebs_snapshots.snapshots|json_query('[].snapshot_id') }}" + delegate_to: localhost + run_once: true + + ## [ See github.com/ansible/ansible/issues/27299 for reason for '| to_json | from_json' ] + - name: get_cluster_hosts_target/aws | update cluster_hosts_target with snapshot_id + set_fact: + cluster_hosts_target: | + {%- for host in cluster_hosts_target -%} + {%- set cluster_host_topology = host.hostname | regex_replace('^.*(-.*?-).*$', '\\1') -%} + {%- for vol in host.auto_volumes -%} + {%- set cur_snapshot = r__ebs_snapshots | default([]) | to_json | from_json | json_query('snapshots[?contains(tags.Name, \'' + cluster_host_topology + '\')]') -%} + {%- if cur_snapshot and 'snapshot_tags' in vol.keys() -%} + {%- set _dummy = vol.update({'snapshot': cur_snapshot[0].snapshot_id}) -%} + {%- set _dummy = vol.pop('snapshot_tags') -%} + {%- endif %} + {%- endfor %} + {%- endfor %} + {{ cluster_hosts_target }} + vars: + _snapshot_tags: "{{ cluster_vars[buildenv].hosttype_vars|json_query('*.auto_volumes[].snapshot_tags') }}" + when: _snapshot_tags|length > 0 diff --git a/cluster_hosts/tasks/get_cluster_hosts_target_esxifree.yml b/cluster_hosts/tasks/get_cluster_hosts_target_esxifree.yml new file mode 100644 index 00000000..871c9a44 --- /dev/null +++ b/cluster_hosts/tasks/get_cluster_hosts_target_esxifree.yml @@ -0,0 +1,11 @@ +--- + +- name: get_cluster_hosts_target/esxifree | Update cluster_hosts_target with volname (derived from the mountpoint) + set_fact: + cluster_hosts_target: | + {%- for host in cluster_hosts_target -%} + {%- for hostvol in host.auto_volumes -%} + {%- set _dummy = hostvol.update({'volname': hostvol.mountpoint | regex_replace('.*\/(.*)', '\\1')}) -%} + {%- endfor %} + {%- endfor %} + {{ cluster_hosts_target }} diff --git a/cluster_hosts/tasks/get_cluster_hosts_target_gcp.yml b/cluster_hosts/tasks/get_cluster_hosts_target_gcp.yml new file mode 100644 index 00000000..a7494e88 --- /dev/null +++ b/cluster_hosts/tasks/get_cluster_hosts_target_gcp.yml @@ -0,0 +1,14 @@ +--- + +- name: get_cluster_hosts_target/gcp | Update cluster_hosts_target auto_volumes with device_name and initialize_params + set_fact: + cluster_hosts_target: |- + {%- for host in cluster_hosts_target -%} + {%- for vol in host.auto_volumes -%} + {%- if 'device_name' not in vol -%} + {%- set _dummy = vol.update({'device_name': host.hostname + '--' + vol.mountpoint | basename }) -%} + {%- set _dummy = vol.update({'initialize_params': {'disk_name': vol.device_name, 'disk_size_gb': vol.volume_size}}) -%} + {%- endif -%} + {%- endfor %} + {%- endfor %} + {{ cluster_hosts_target }} diff --git a/cluster_hosts/tasks/main.yml b/cluster_hosts/tasks/main.yml index 1e5c95fe..02f6f6db 100644 --- a/cluster_hosts/tasks/main.yml +++ b/cluster_hosts/tasks/main.yml @@ -1,7 +1,13 @@ --- - name: Get the state of the VMs in the cluster - include_tasks: get_cluster_hosts_state.yml + include_tasks: "get_cluster_hosts_state_{{cluster_vars.type}}.yml" + +- name: get_cluster_hosts_state | cluster_hosts_state + debug: msg="{{cluster_hosts_state}}" + delegate_to: localhost + run_once: true + when: cluster_hosts_state is defined - assert: that: diff --git a/config/tasks/disks_auto_aws_gcp.yml b/config/tasks/disks_auto_aws_gcp.yml deleted file mode 100644 index 2186852d..00000000 --- a/config/tasks/disks_auto_aws_gcp.yml +++ /dev/null @@ -1,154 +0,0 @@ ---- - -- name: disks_auto_aws_gcp | cluster_hosts_target(inventory_hostname) - debug: msg={{ cluster_hosts_target | json_query(\"[?hostname == `\" + inventory_hostname + \"`] \") }} - -- name: disks_auto_aws_gcp | Mount block devices as individual disks - block: - - name: disks_auto_aws_gcp | auto_vols - debug: msg={{ auto_vols }} - - - name: disks_auto_aws_gcp | Get the block device information (pre-filesystem create) - blockdevmap: - cloud_type: "{{cluster_vars.type}}" - become: yes - register: r__blockdevmap - - - name: disks_auto_aws_gcp | r__blockdevmap (pre-filesystem create) - debug: msg={{r__blockdevmap}} - - - name: disks_auto_aws_gcp | Create filesystem (partitionless) - become: yes - filesystem: - fstype: "{{ item.fstype }}" - dev: "{{ _dev }}" - loop: "{{auto_vols}}" - vars: - _dev: "{{ r__blockdevmap.device_map | json_query(\"[?device_name_cloud == `\" + item.device_name + \"` && TYPE==`disk` && parttable_type==`` && FSTYPE==`` && MOUNTPOINT==``].device_name_os | [0]\") }}" - when: _dev is defined and _dev != '' - - - name: disks_auto_aws_gcp | Get the block device information (post-filesystem create), to get the block IDs for mounting - blockdevmap: - cloud_type: "{{cluster_vars.type}}" - become: yes - register: r__blockdevmap - - - name: disks_auto_aws_gcp | r__blockdevmap (post-filesystem create) - debug: msg={{r__blockdevmap}} - - - name: disks_auto_aws_gcp | Mount created filesytem(s) persistently - become: yes - mount: - path: "{{ item.mountpoint }}" - src: "UUID={{ _UUID }}" - fstype: "{{ item.fstype }}" - state: mounted - opts: _netdev - loop: "{{auto_vols}}" - vars: - _UUID: "{{ r__blockdevmap.device_map | json_query(\"[?device_name_cloud == `\" + item.device_name + \"` && TYPE==`disk` && parttable_type==`` && MOUNTPOINT==``].UUID | [0]\") }}" - when: _UUID is defined and _UUID != '' - - - name: disks_auto_aws_gcp | change ownership of mountpoint (if set) - become: yes - file: - path: "{{ item.mountpoint }}" - state: directory - mode: "{{ item.perms.mode | default(omit)}}" - owner: "{{ item.perms.owner | default(omit)}}" - group: "{{ item.perms.group | default(omit)}}" - loop: "{{auto_vols}}" - - - block: - - name: disks_auto_aws_gcp | Touch a file with the mountpoint and device name for testing that disk attachment is correct - become: yes - file: - path: "{{item.mountpoint}}/__clusterversetest_{{ item.mountpoint | regex_replace('\\/', '_') }}_{{ item.device_name | regex_replace('\/', '_') }}" - state: touch - loop: "{{auto_vols}}" - - - name: disks_auto_aws_gcp | Find all __clusterversetest_ files in newly mounted disks - find: - paths: "{{item.mountpoint}}" - patterns: "__clusterversetest_*" - loop: "{{auto_vols}}" - register: r__find_test - - - name: disks_auto_aws_gcp | Display all __clusterversetest_ files in newly mounted disks. - debug: - msg: "{{ r__find_test | json_query(\"results[].{device_name: item.device_name, mountpoint: item.mountpoint, files: files[].path}\") }}" - when: test_touch_disks is defined and test_touch_disks|bool - when: (auto_vols | map(attribute='mountpoint') | list | unique | count == auto_vols | map(attribute='mountpoint') | list | count) - vars: - auto_vols: "{{ cluster_hosts_target | json_query(\"[?hostname == `\" + inventory_hostname + \"`].auto_volumes[]\") }}" - - -# The following block mounts all attached volumes that have a single, common mountpoint, by creating a logical volume -- name: disks_auto_aws_gcp | Mount block devices in a single LVM mountpoint through LV/VG - block: - - name: disks_auto_aws_gcp | Install logical volume management tooling. (yum - RedHat/CentOS) - become: true - yum: - name: "lvm*" - state: present - when: ansible_os_family == 'RedHat' - - - name: disks_auto_aws_gcp | Get the device information (pre-filesystem create) - blockdevmap: - become: yes - register: r__blockdevmap - - - name: disks_auto_aws_gcp | r__blockdevmap (pre-filesystem create) - debug: msg={{r__blockdevmap}} - - - name: disks_auto_aws_gcp | Create a volume group from all block devices - become: yes - lvg: - vg: "{{ hosttype_vars.lvmparams.vg_name }}" - pvs: "{{ r__blockdevmap.device_map | json_query(\"[?device_name_cloud && contains('\" + auto_vol_device_names + \"', device_name_cloud)].device_name_os\") | join(',')}}" - vars: - auto_vol_device_names: "{{hosttype_vars.auto_volumes | map(attribute='device_name') | sort | join(',')}}" - - - name: disks_auto_aws_gcp | Create a logical volume from volume group - become: yes - lvol: - vg: "{{ hosttype_vars.lvmparams.vg_name }}" - lv: "{{ hosttype_vars.lvmparams.lv_name }}" - size: "{{ hosttype_vars.lvmparams.lv_size }}" - - - name: disks_auto_aws_gcp | Create filesystem(s) on attached volume(s) - become: yes - filesystem: - fstype: "{{ hosttype_vars.auto_volumes[0].fstype }}" - dev: "/dev/{{ hosttype_vars.lvmparams.vg_name }}/{{ hosttype_vars.lvmparams.lv_name }}" - force: no - - - name: disks_auto_aws_gcp | Mount created filesytem(s) persistently - become: yes - mount: - path: "{{ hosttype_vars.auto_volumes[0].mountpoint }}" - src: "/dev/{{ hosttype_vars.lvmparams.vg_name }}/{{ hosttype_vars.lvmparams.lv_name }}" - fstype: "{{ hosttype_vars.auto_volumes[0].fstype }}" - state: mounted - opts: _netdev - - - block: - - name: disks_auto_aws_gcp | Touch a file with the mountpoint and device name for testing that disk attachment is correct - become: yes - file: - path: "{{ hosttype_vars.auto_volumes[0].mountpoint }}/__clusterversetest_{{ hosttype_vars.auto_volumes[0].mountpoint | regex_replace('\\/', '_') }}" - state: touch - - - name: disks_auto_aws_gcp | Find all __clusterversetest_ files in newly mounted disks - find: - paths: "{{ hosttype_vars.auto_volumes[0].mountpoint }}" - patterns: "__clusterversetest_*" - register: r__find_test - - - name: disks_auto_aws_gcp | Display all __clusterversetest_ files in newly mounted disks. - debug: - msg: "{{ r__find_test | json_query(\"files[].path\") }}" - when: test_touch_disks is defined and test_touch_disks|bool - when: ('lvmparams' in hosttype_vars) and (hosttype_vars.auto_volumes | map(attribute='mountpoint') | list | unique | count == 1) and (hosttype_vars.auto_volumes | map(attribute='mountpoint') | list | count >= 2) and (hosttype_vars.auto_volumes | map(attribute='fstype') | list | unique | count == 1) - vars: - hosttype_vars: "{{ cluster_hosts_target | json_query(\"[?hostname == `\" + inventory_hostname + \"`]\") }}" diff --git a/config/tasks/disks_auto_aws_gcp_azure.yml b/config/tasks/disks_auto_aws_gcp_azure.yml new file mode 100644 index 00000000..ccdfe2b4 --- /dev/null +++ b/config/tasks/disks_auto_aws_gcp_azure.yml @@ -0,0 +1,173 @@ +--- + +- name: disks_auto_aws_gcp_azure | cluster_hosts_target(inventory_hostname) + debug: msg={{ cluster_hosts_target | json_query(\"[?hostname == '\" + inventory_hostname + \"'] \") }} + +- name: disks_auto_aws_gcp_azure | Mount block devices as individual disks + block: + - name: disks_auto_aws_gcp_azure | auto_vols + debug: msg={{ auto_vols }} + + - name: disks_auto_aws_gcp_azure | Get the block device information (pre-filesystem create) + blockdevmap: + cloud_type: "{{cluster_vars.type}}" + become: yes + register: r__blockdevmap + + - name: disks_auto_aws_gcp_azure | r__blockdevmap (pre-filesystem create) + debug: msg={{r__blockdevmap}} + + - name: disks_auto_aws_gcp_azure | Create filesystem (partitionless) + become: yes + filesystem: + fstype: "{{ item.fstype }}" + dev: "{{ _dev }}" + loop: "{{auto_vols}}" + vars: + _dev: "{{ r__blockdevmap.device_map | json_query(\"[?device_name_cloud == '\" + item.device_name + \"' && TYPE=='disk' && parttable_type=='' && FSTYPE=='' && MOUNTPOINT==''].device_name_os | [0]\") }}" + when: _dev is defined and _dev != '' + + - name: disks_auto_aws_gcp_azure | Get the block device information (post-filesystem create), to get the block IDs for mounting + blockdevmap: + cloud_type: "{{cluster_vars.type}}" + become: yes + register: r__blockdevmap + + - name: disks_auto_aws_gcp_azure | r__blockdevmap (post-filesystem create) + debug: msg={{r__blockdevmap}} + + - name: disks_auto_aws_gcp_azure | Mount created filesytem(s) persistently + become: yes + mount: + path: "{{ item.mountpoint }}" + src: "UUID={{ _UUID }}" + fstype: "{{ item.fstype }}" + state: mounted + opts: _netdev + loop: "{{auto_vols}}" + vars: + _UUID: "{{ r__blockdevmap.device_map | json_query(\"[?device_name_cloud == '\" + item.device_name + \"' && TYPE=='disk' && parttable_type=='' && MOUNTPOINT==''].UUID | [0]\") }}" + when: _UUID is defined and _UUID != '' + + - name: disks_auto_aws_gcp_azure | change ownership of mountpoint (if set) + become: yes + file: + path: "{{ item.mountpoint }}" + state: directory + mode: "{{ item.perms.mode | default(omit)}}" + owner: "{{ item.perms.owner | default(omit)}}" + group: "{{ item.perms.group | default(omit)}}" + loop: "{{auto_vols}}" + + - name: disks_auto_aws_gcp_azure | Check that we haven't mounted disks in the wrong place. Especially useful for redeploys when we're moving disks. + block: + - name: "disks_auto_aws_gcp_azure | Touch a file with the mountpoint and device name for testing that disk attachment is correct. Note: Use a unique filename here instead of writing to a file, so that more than one file per device is an error." + become: yes + file: + path: "{{item.mountpoint}}/.clusterversetest__{{inventory_hostname | regex_replace('-(?!.*-).*')}}__{{ item.mountpoint | regex_replace('\\/', '_') }}__{{ item.device_name | regex_replace('\/', '_') }}" + state: touch + loop: "{{auto_vols}}" + + - name: disks_auto_aws_gcp_azure | Find all .clusterversetest__ files in mounted disks + find: + paths: "{{item.mountpoint}}" + hidden: yes + patterns: ".clusterversetest__*" + loop: "{{auto_vols}}" + register: r__find_test + + - name: disks_auto_aws_gcp_azure | Check that there is only one .clusterversetest__ file per device in mounted disks. + block: + - name: disks_auto_aws_gcp_azure | testdevicedescriptor + debug: msg={{testdevicedescriptor}} + + - name: disks_auto_aws_gcp_azure | assert that only one device descriptor file exists per disk (otherwise, indicates that this run has mapped either more than one device per mount, or a different one to previous) + assert: { that: "testdevicedescriptor | json_query(\"[?length(files) > `1`]\") | length == 0", fail_msg: "ERROR - only a single file should exist per storage device. In error [{{testdevicedescriptor | json_query(\"[?length(files) > `1`]\")}}]" } + vars: + testdevicedescriptor: "{{ r__find_test | json_query(\"results[].{hostname: '\" + inventory_hostname + \"', device_name: item.device_name, mountpoint: item.mountpoint, files: files[].path}\") }}" + when: test_touch_disks is defined and test_touch_disks|bool + when: (auto_vols | map(attribute='mountpoint') | list | unique | count == auto_vols | map(attribute='mountpoint') | list | count) + vars: + auto_vols: "{{ cluster_hosts_target | json_query(\"[?hostname == '\" + inventory_hostname + \"'].auto_volumes[]\") }}" + + +# The following block mounts all attached volumes that have a single, common mountpoint, by creating a logical volume +- name: disks_auto_aws_gcp_azure/lvm | Mount block devices in a single LVM mountpoint through LV/VG + block: + - name: disks_auto_aws_gcp_azure/lvm | hosttype_vars + debug: msg={{ hosttype_vars }} + + - name: disks_auto_aws_gcp_azure/lvm | Install logical volume management tooling. (yum - RedHat/CentOS) + become: true + yum: + name: "lvm*" + state: present + when: ansible_os_family == 'RedHat' + + - name: disks_auto_aws_gcp_azure/lvm | Get the device information (pre-filesystem create) + blockdevmap: + become: yes + register: r__blockdevmap + + - name: disks_auto_aws_gcp_azure/lvm | r__blockdevmap (pre-filesystem create) + debug: msg={{r__blockdevmap}} + + - name: disks_auto_aws_gcp_azure/lvm | Create a volume group from all block devices + become: yes + lvg: + vg: "{{ hosttype_vars.lvmparams.vg_name }}" + pvs: "{{ r__blockdevmap.device_map | json_query(\"[?device_name_cloud && contains('\" + auto_vol_device_names + \"', device_name_cloud)].device_name_os\") | join(',')}}" + vars: + auto_vol_device_names: "{{hosttype_vars.auto_volumes | map(attribute='device_name') | sort | join(',')}}" + + - name: disks_auto_aws_gcp_azure/lvm | Create a logical volume from volume group + become: yes + lvol: + vg: "{{ hosttype_vars.lvmparams.vg_name }}" + lv: "{{ hosttype_vars.lvmparams.lv_name }}" + size: "{{ hosttype_vars.lvmparams.lv_size }}" + + - name: disks_auto_aws_gcp_azure/lvm | Create filesystem(s) on attached volume(s) + become: yes + filesystem: + fstype: "{{ hosttype_vars.auto_volumes[0].fstype }}" + dev: "/dev/{{ hosttype_vars.lvmparams.vg_name }}/{{ hosttype_vars.lvmparams.lv_name }}" + force: no + + - name: disks_auto_aws_gcp_azure/lvm | Mount created filesytem(s) persistently + become: yes + mount: + path: "{{ hosttype_vars.auto_volumes[0].mountpoint }}" + src: "/dev/{{ hosttype_vars.lvmparams.vg_name }}/{{ hosttype_vars.lvmparams.lv_name }}" + fstype: "{{ hosttype_vars.auto_volumes[0].fstype }}" + state: mounted + opts: _netdev + + - name: disks_auto_aws_gcp_azure/lvm | Check that we haven't mounted disks in the wrong place. Especially useful for redeploys when we're moving disks. + block: + - name: "disks_auto_aws_gcp_azure/lvm | Touch a file with the mountpoint for testing that disk attachment is correct. Note: Use a unique filename here instead of writing to a file, so that more than one file per device is an error." + become: yes + file: + path: "{{ hosttype_vars.auto_volumes[0].mountpoint }}/.clusterversetest__{{inventory_hostname | regex_replace('-(?!.*-).*')}}__{{ hosttype_vars.auto_volumes[0].mountpoint | regex_replace('\\/', '_') }}" + state: touch + + - name: disks_auto_aws_gcp_azure/lvm | Find all .clusterversetest__ files in mounted disks + find: + paths: "{{ hosttype_vars.auto_volumes[0].mountpoint }}" + hidden: yes + patterns: ".clusterversetest__*" + register: r__find_test + + - name: disks_auto_aws_gcp_azure/lvm | Check that there is only one .clusterversetest__ file per device in mounted disks. + block: + - name: disks_auto_aws_gcp_azure/lvm | testdevicedescriptor + debug: msg={{testdevicedescriptor}} + + - name: disks_auto_aws_gcp_azure/lvm | assert that only one device descriptor file exists per disk (otherwise, indicates that this run has mapped either more than one device per mount, or a different one to previous) + assert: { that: "testdevicedescriptor | json_query(\"[?length(files) > `1`]\") | length == 0", fail_msg: "ERROR - only a single file should exist per storage device. In error [{{testdevicedescriptor | json_query(\"[?length(files) > `1`]\")}}]" } + vars: + testdevicedescriptor: "{{ r__find_test | json_query(\"results[].{hostname: '\" + inventory_hostname + \"', device_name: item.device_name, mountpoint: item.mountpoint, files: files[].path}\") }}" + when: test_touch_disks is defined and test_touch_disks|bool + when: ('lvmparams' in hosttype_vars) and (hosttype_vars.auto_volumes | map(attribute='mountpoint') | list | unique | count == 1) and (hosttype_vars.auto_volumes | map(attribute='mountpoint') | list | count >= 2) and (hosttype_vars.auto_volumes | map(attribute='fstype') | list | unique | count == 1) + vars: + hosttype_vars: "{{ cluster_hosts_target | json_query(\"[?hostname == '\" + inventory_hostname + \"']\") }}" diff --git a/config/tasks/main.yml b/config/tasks/main.yml index b99cc683..1aab6595 100644 --- a/config/tasks/main.yml +++ b/config/tasks/main.yml @@ -52,13 +52,13 @@ mode: 0755 when: (static_journal is defined and static_journal|bool) -- name: Create partition table, format and attach volumes - AWS or GCP - include_tasks: disks_auto_aws_gcp.yml - when: cluster_vars.type == "aws" or cluster_vars.type == "gcp" +- name: Create partition table, format and attach volumes - AWS, GCP or Azure + include_tasks: disks_auto_aws_gcp_azure.yml + when: cluster_vars.type == "aws" or cluster_vars.type == "gcp" or cluster_vars.type == "azure" - name: Create partition table, format and attach volumes - generic include_tasks: disks_auto_generic.yml - when: cluster_vars.type != "aws" and cluster_vars.type != "gcp" + when: cluster_vars.type != "aws" and cluster_vars.type != "gcp" and cluster_vars.type != "azure" - name: install prometheus node exporter daemon include_tasks: prometheus_node_exporter.yml diff --git a/create/tasks/aws.yml b/create/tasks/aws.yml index 77a3ee2e..7071d6cf 100644 --- a/create/tasks/aws.yml +++ b/create/tasks/aws.yml @@ -3,7 +3,7 @@ - name: cluster_hosts_target_denormalised_by_volume debug: msg="{{cluster_hosts_target_denormalised_by_volume}}" -- name: create/aws | Create AWS security group +- name: create/aws | Create security groups ec2_group: name: "{{ cluster_name }}-sg" description: "{{ cluster_name }} rules" diff --git a/create/tasks/azure.yml b/create/tasks/azure.yml new file mode 100644 index 00000000..6f9cdae4 --- /dev/null +++ b/create/tasks/azure.yml @@ -0,0 +1,194 @@ +--- + +- name: cluster_hosts_target_denormalised_by_volume + debug: msg="{{cluster_hosts_target_denormalised_by_volume}}" + +#- name: create/azure | Create storage account (must be [a-z0-9] and <= 24 chars). NOT NECESSARY for IaaS block storage +# azure.azcollection.azure_rm_storageaccount: +# client_id: "{{cluster_vars[buildenv].azure_client_id}}" +# secret: "{{cluster_vars[buildenv].azure_secret}}" +# subscription_id: "{{cluster_vars[buildenv].azure_subscription_id}}" +# tenant: "{{cluster_vars[buildenv].azure_tenant}}" +# resource_group: "{{cluster_vars[buildenv].azure_resource_group}}" +# name: "{{ (cluster_name | regex_replace('[^a-z0-9]', ''))[:24] }}" +## name: "{{ cluster_suffix }}" +## name: "{{ (item.hostname|hash('md5'))[:24] }}" +# account_type: Standard_LRS +# register: r__azure_rm_storageaccount + + +#### NOTE: +# - Normally, to create an Azure VM, we would create a security group (azure_rm_securitygroup) and if needed, a public IP address (azure_rm_publicipaddress), then attach them +# to a NIC (azure_rm_networkinterface). We would pass this NIC to the VM creation plugin (azure_rm_virtualmachine) in the network_interface_names parameter. +# - Unfortunately, the azure_rm_publicipaddress and azure_rm_networkinterface are not Availability-Zone aware, so when we create the VM (in a specific AZ), the IP is not in +# that zone, so the build fails. +# - The alternative is to build a VM without network_interface_names set. This causes the VM to be built with default public IP and security groups, so we need to change them +# afterwards instead. +#### + +#- name: create/azure | Create security groups +# azure.azcollection.azure_rm_securitygroup: +# client_id: "{{cluster_vars[buildenv].azure_client_id}}" +# secret: "{{cluster_vars[buildenv].azure_secret}}" +# subscription_id: "{{cluster_vars[buildenv].azure_subscription_id}}" +# tenant: "{{cluster_vars[buildenv].azure_tenant}}" +# resource_group: "{{cluster_vars[buildenv].azure_resource_group}}" +# name: "{{ cluster_name }}" +# tags: +# env: "{{ buildenv }}" +# rules: "{{ cluster_vars.rules }}" +# register: r__azure_rm_securitygroup +# when: cluster_vars.rules | length > 0 +# +#- name: create/azure | r__azure_rm_securitygroup +# debug: msg={{r__azure_rm_securitygroup}} + +#- name: create/azure | Create a public ip address +# azure.azcollection.azure_rm_publicipaddress: +# client_id: "{{cluster_vars[buildenv].azure_client_id}}" +# secret: "{{cluster_vars[buildenv].azure_secret}}" +# subscription_id: "{{cluster_vars[buildenv].azure_subscription_id}}" +# tenant: "{{cluster_vars[buildenv].azure_tenant}}" +# resource_group: "{{cluster_vars[buildenv].azure_resource_group}}" +# name: "{{item.hostname}}" +# allocation_method: static +## zones: ["{{item.az_name}}"] +# register: r__azure_rm_publicipaddress +# loop: "{{ cluster_hosts_target }}" +# +#- name: create/azure | r__azure_rm_publicipaddress +# debug: msg={{r__azure_rm_publicipaddress}} + +#- name: Create NIC +# azure_rm_networkinterface: +# client_id: "{{cluster_vars[buildenv].azure_client_id}}" +# secret: "{{cluster_vars[buildenv].azure_secret}}" +# subscription_id: "{{cluster_vars[buildenv].azure_subscription_id}}" +# tenant: "{{cluster_vars[buildenv].azure_tenant}}" +# resource_group: "{{cluster_vars[buildenv].azure_resource_group}}" +# name: "{{item.hostname}}" +# virtual_network: "{{cluster_vars[buildenv].vnet_name}}" +# subnet: "{{cluster_vars[buildenv].vpc_subnet_name_prefix}}" +# ip_configurations: +# - name: "{{item.hostname}}-config" +# public_ip_address_name: "{{item.hostname}}-publicip" +# primary: True +# security_group: "{{r__azure_rm_securitygroup.state.name}}" +# register: r__azure_rm_networkinterface +# loop: "{{ cluster_hosts_target }}" +# +#- name: create/azure | r__azure_rm_networkinterface +# debug: msg={{r__azure_rm_networkinterface}} + + +- name: create/azure | Create VMs asynchronously and wait for completion + block: + - name: create/azure | Detach volumes from previous instances (during the _scheme_rmvm_keepdisk_rollback redeploy, we only redeploy one host at a time, and it is already powered off) + azure.azcollection.azure_rm_manageddisk: + client_id: "{{cluster_vars[buildenv].azure_client_id}}" + secret: "{{cluster_vars[buildenv].azure_secret}}" + subscription_id: "{{cluster_vars[buildenv].azure_subscription_id}}" + tenant: "{{cluster_vars[buildenv].azure_tenant}}" + resource_group: "{{cluster_vars[buildenv].azure_resource_group}}" + name: "{{item.auto_volume.src.volume_id | basename}}" + managed_by: '' + loop: "{{ cluster_hosts_target_denormalised_by_volume | selectattr('auto_volume.src', 'defined') | list }}" + + - name: create/azure | Create VMs asynchronously + azure.azcollection.azure_rm_virtualmachine: + client_id: "{{cluster_vars[buildenv].azure_client_id}}" + secret: "{{cluster_vars[buildenv].azure_secret}}" + subscription_id: "{{cluster_vars[buildenv].azure_subscription_id}}" + tenant: "{{cluster_vars[buildenv].azure_tenant}}" + resource_group: "{{cluster_vars[buildenv].azure_resource_group}}" + admin_username: "{{cluster_vars[buildenv].ssh_connection_cfg.host.ansible_user}}" + custom_data : "{{cluster_vars.user_data | default(omit)}}" + image: "{{cluster_vars.image}}" + managed_disk_type: Standard_LRS + name: "{{item.hostname}}" + os_disk_size_gb: "{{item.os_disk_size_gb | default(omit)}}" +# network_interface_names: "{{r__azure_rm_networkinterface.results | json_query(\"[?item.hostname == `\" + item.hostname + \"`].state.name\") }}" + open_ports: ["9"] # tcp/9 is the 'discard' (dev/null) port. It is set because we must put a value in here, otherwise the default tcp/22 is opened to any/any. azure_rm_securitygroup is set below. + public_ip_allocation_method: "{%- if cluster_vars.assign_public_ip == 'yes' -%}Static{%- else -%}Disabled{%- endif -%}" + ssh_password_enabled: no + ssh_public_keys: + - path: "/home/{{cluster_vars[buildenv].ssh_connection_cfg.host.ansible_user}}/.ssh/authorized_keys" + #The ssh key is either provided on the command line (as 'ansible_ssh_private_key_file'), or as a variable in cluster_vars[buildenv].ssh_connection_cfg.host.ansible_ssh_private_key_file (anchored to _host_ssh_connection_cfg.ansible_ssh_private_key_file); we can slurp the key from either variable, and then ssh-keygen it into the public key (we have to remove the comment though before we add our own, (hence the regex), because this is what gcp expects). + key_data: "{%- if _host_ssh_connection_cfg.ansible_ssh_private_key_file is defined -%}{{ lookup('pipe', 'ssh-keygen -y -f /dev/stdin < 0 + +# - name: create/azure | r__azure_rm_securitygroup +# debug: msg={{r__azure_rm_securitygroup}} + + + - name: create/azure | Create and attach managed disk(s) to VM. Do NOT ATTEMPT to do this asynchronously - causes issues! + azure.azcollection.azure_rm_manageddisk: + client_id: "{{cluster_vars[buildenv].azure_client_id}}" + secret: "{{cluster_vars[buildenv].azure_secret}}" + subscription_id: "{{cluster_vars[buildenv].azure_subscription_id}}" + tenant: "{{cluster_vars[buildenv].azure_tenant}}" + resource_group: "{{cluster_vars[buildenv].azure_resource_group}}" + attach_caching: read_only + disk_size_gb: "{{item.auto_volume.disk_size_gb}}" + lun: "{{item.auto_volume.device_name}}" + managed_by: "{{item.hostname}}" + name: "{{_tags.name}}" + storage_account_type: "{{item.auto_volume.storage_account_type}}" + tags: "{{ _tags | combine(cluster_vars.custom_tagslabels | default({})) }}" + zone: "{{item.az_name}}" + vars: + _tags: + name: "{{item.hostname}}--{{ item.auto_volume.mountpoint | basename }}" + inv_node_version: "{{cluster_vars[buildenv].hosttype_vars[item.hosttype].version | default(omit)}}" + inv_node_type: "{{item.hosttype}}" + owner: "{{ lookup('env','USER') | lower }}" + release: "{{ release_version }}" + loop: "{{ cluster_hosts_target_denormalised_by_volume }}" + register: r__azure_rm_manageddisk + +# - name: create/azure | r__azure_rm_manageddisk +# debug: msg={{r__azure_rm_manageddisk}} diff --git a/create/tasks/gcp.yml b/create/tasks/gcp.yml index 716ee14e..23bac4d2 100644 --- a/create/tasks/gcp.yml +++ b/create/tasks/gcp.yml @@ -1,8 +1,8 @@ --- -- name: create/gcp | Create GCP network and subnetwork (if -e create_gcp_network=true) +- name: create/gcp | Create network and subnetwork (if -e create_gcp_network=true) block: - - name: create/gcp | Create GCP host network (if -e create_gcp_network=true) + - name: create/gcp | Create host network (if -e create_gcp_network=true) gcp_compute_network: name: "{{cluster_vars[buildenv].vpc_network_name}}" auto_create_subnetworks: "{%- if cluster_vars[buildenv].vpc_subnet_name is defined and cluster_vars[buildenv].vpc_subnet_name != '' -%} false {%- else -%} true {%- endif -%}" @@ -11,7 +11,7 @@ service_account_file: "{{gcp_credentials_file}}" register: r__gcp_compute_network - - name: create/gcp | Create GCP host subnetwork (if -e create_gcp_network=true) + - name: create/gcp | Create host subnetwork (if -e create_gcp_network=true) gcp_compute_subnetwork: name: "{{cluster_vars[buildenv].vpc_subnet_name}}" network: "{{r__gcp_compute_network}}" @@ -22,9 +22,9 @@ when: create_gcp_network is defined and create_gcp_network|bool -- name: create/gcp | Create GCP firewalls +- name: create/gcp | Create firewalls block: - - name: create/gcp | Get GCP network facts + - name: create/gcp | Get network facts gcp_compute_network_info: filters: - "name = {{cluster_vars[buildenv].vpc_network_name}}" @@ -37,7 +37,7 @@ - name: "Assert that {{cluster_vars[buildenv].vpc_network_name}} network exists" assert: { that: "r__gcp_compute_network_info['resources'] | length > 0", msg: "The {{cluster_vars[buildenv].vpc_network_name}} network must exist (create with ' -e create_gcp_network=true')" } - - name: create/gcp | Get GCP subnetwork facts + - name: create/gcp | Get subnetwork facts gcp_compute_subnetwork_info: filters: - "name = {{cluster_vars[buildenv].vpc_subnet_name}}" @@ -53,7 +53,7 @@ assert: { that: "r__gcp_compute_subnetwork_info['resources'] | length > 0", msg: "The {{cluster_vars[buildenv].vpc_subnet_name}} subnet must exist" } when: (cluster_vars[buildenv].vpc_subnet_name is defined) and (cluster_vars[buildenv].vpc_subnet_name != "") - - name: create/gcp | Create GCP cluster firewalls + - name: create/gcp | Create cluster firewalls gcp_compute_firewall: name: "{{ item.name }}" target_tags: "{{cluster_vars.network_fw_tags}}" @@ -68,7 +68,7 @@ with_items: "{{ cluster_vars.firewall_rules }}" -- name: create/gcp | Create GCP VMs asynchronously and wait for completion +- name: create/gcp | Create VMs asynchronously and wait for completion block: - name: create/gcp | Detach volumes from previous instances (during the _scheme_rmvm_keepdisk_rollback redeploy, we only redeploy one host at a time, and it is already powered off) gce_pd: @@ -82,7 +82,7 @@ name: "{{item.auto_volume.src.source_url | basename}}" loop: "{{ cluster_hosts_target_denormalised_by_volume | selectattr('auto_volume.src', 'defined') | list }}" - - name: create/gcp | Create GCP VMs asynchronously + - name: create/gcp | Create VMs asynchronously gcp_compute_instance: auth_kind: "serviceaccount" service_account_file: "{{gcp_credentials_file}}" diff --git a/dynamic_inventory/tasks/azure.yml b/dynamic_inventory/tasks/azure.yml new file mode 100644 index 00000000..53051335 --- /dev/null +++ b/dynamic_inventory/tasks/azure.yml @@ -0,0 +1,123 @@ +--- + +# Note: Azure, irritatingly, doesn't provide all the info we need for cluster_hosts_target in one place. We have to run each of these, passing the results of the previous into the next. +# + VM info: azure_rm_virtualmachine_info +# + VM AZ info: azure_rm_resource_info +# + Private IP info: azure_rm_networkinterface_info +# + Public IP info: azure_rm_publicipaddress_info + +- name: dynamic_inventory/azure | Get instance info + azure.azcollection.azure_rm_virtualmachine_info: + client_id: "{{cluster_vars[buildenv].azure_client_id}}" + secret: "{{cluster_vars[buildenv].azure_secret}}" + subscription_id: "{{cluster_vars[buildenv].azure_subscription_id}}" + tenant: "{{cluster_vars[buildenv].azure_tenant}}" + resource_group: "{{cluster_vars[buildenv].azure_resource_group}}" + tags: + - "cluster_name:{{cluster_name}}" + register: r__azure_rm_virtualmachine_info + delegate_to: localhost + run_once: true + +#- name: dynamic_inventory/azure | r__azure_rm_virtualmachine_info +# debug: msg="{{r__azure_rm_virtualmachine_info}}" +# delegate_to: localhost +# run_once: true + +- name: dynamic_inventory/azure | Get instance resource info (for VM AZ info) + azure.azcollection.azure_rm_resource_info: + client_id: "{{cluster_vars[buildenv].azure_client_id}}" + secret: "{{cluster_vars[buildenv].azure_secret}}" + subscription_id: "{{cluster_vars[buildenv].azure_subscription_id}}" + tenant: "{{cluster_vars[buildenv].azure_tenant}}" + resource_group: "{{cluster_vars[buildenv].azure_resource_group}}" + resource_name: "{{ item.name }}" + resource_type: VirtualMachines + provider: Compute + with_items: "{{ r__azure_rm_virtualmachine_info.vms | json_query(\"[?power_state=='running']\") }}" + register: r__azure_rm_resource_info + delegate_to: localhost + run_once: true + async: 7200 + poll: 0 + +- name: dynamic_inventory/azure | Wait for instance resource info (to get Zone info) + async_status: { jid: "{{ item.ansible_job_id }}" } + register: r__async_status__azure_rm_resource_info + until: r__async_status__azure_rm_resource_info.finished + delay: 3 + retries: 300 + with_items: "{{r__azure_rm_resource_info.results}}" + +#- name: dynamic_inventory/azure | r__async_status__azure_rm_resource_info +# debug: msg="{{r__async_status__azure_rm_resource_info}}" +# delegate_to: localhost +# run_once: true + + +- name: dynamic_inventory/azure | Get network interface info (per instance) + azure.azcollection.azure_rm_networkinterface_info: + client_id: "{{cluster_vars[buildenv].azure_client_id}}" + secret: "{{cluster_vars[buildenv].azure_secret}}" + subscription_id: "{{cluster_vars[buildenv].azure_subscription_id}}" + tenant: "{{cluster_vars[buildenv].azure_tenant}}" + resource_group: "{{cluster_vars[buildenv].azure_resource_group}}" + name: "{{ item.networkInterface | basename }}" + with_items: "{{ r__async_status__azure_rm_resource_info.results | json_query(\"[].response[0].{hosttype: tags.hosttype, hostname: name, networkInterface: properties.networkProfile.networkInterfaces[0].id, regionzone: join('-',[location,zones[0]])}\") }}" + register: r__azure_rm_networkinterface_info + delegate_to: localhost + run_once: true + async: 7200 + poll: 0 + +- name: dynamic_inventory/azure | Wait for network interface info + async_status: { jid: "{{ item.ansible_job_id }}" } + register: r__async_status__azure_rm_networkinterface_info + until: r__async_status__azure_rm_networkinterface_info.finished + delay: 3 + retries: 300 + with_items: "{{r__azure_rm_networkinterface_info.results}}" + +#- name: dynamic_inventory/azure | r__async_status__azure_rm_networkinterface_info +# debug: msg="{{r__async_status__azure_rm_networkinterface_info}}" +# delegate_to: localhost +# run_once: true + + +- name: dynamic_inventory/azure | Get publicipaddress info (per instance) + azure.azcollection.azure_rm_publicipaddress_info: + client_id: "{{cluster_vars[buildenv].azure_client_id}}" + secret: "{{cluster_vars[buildenv].azure_secret}}" + subscription_id: "{{cluster_vars[buildenv].azure_subscription_id}}" + tenant: "{{cluster_vars[buildenv].azure_tenant}}" + resource_group: "{{cluster_vars[buildenv].azure_resource_group}}" + name: "{{ item.public_ip_id | basename }}" + with_items: "{{ r__async_status__azure_rm_networkinterface_info.results | json_query(\"[].{hosttype: item.item.hosttype, hostname: item.item.hostname, regionzone: item.item.regionzone, private_ip: networkinterfaces[].ip_configurations[0].private_ip_address|[0], public_ip_id: networkinterfaces[].ip_configurations[0].public_ip_address|[0]}\") }}" + register: r__azure_rm_networkinterface_info + delegate_to: localhost + run_once: true + async: 7200 + poll: 0 + +- name: dynamic_inventory/azure | Wait for publicipaddress info + async_status: { jid: "{{ item.ansible_job_id }}" } + register: r__async_status__azure_rm_publicipaddress_info + until: r__async_status__azure_rm_publicipaddress_info.finished + delay: 3 + retries: 300 + with_items: "{{r__azure_rm_networkinterface_info.results}}" + +#- name: dynamic_inventory/azure | r__async_status__azure_rm_publicipaddress_info +# debug: msg="{{r__async_status__azure_rm_publicipaddress_info}}" +# delegate_to: localhost +# run_once: true + +- name: dynamic_inventory/aws | Set dynamic_inventory_flat + set_fact: + dynamic_inventory_flat: | + {%- if cluster_vars.inventory_ip == 'private' -%} + {{ r__async_status__azure_rm_publicipaddress_info.results | json_query('[].{hosttype: item.item.hosttype, hostname: item.item.hostname, regionzone: item.item.regionzone, private_ip: item.item.private_ip, public_ip: publicipaddresses[0].ip_address, inventory_ip: item.item.private_ip}') | default([]) }} + {%- else -%} + {{ r__async_status__azure_rm_publicipaddress_info.results | json_query('[].{hosttype: item.item.hosttype, hostname: item.item.hostname, regionzone: item.item.regionzone, private_ip: item.item.private_ip, public_ip: publicipaddresses[0].ip_address, inventory_ip: publicipaddresses[0].ip_address}') | default([]) }} + {%- endif -%} + diff --git a/jenkinsfiles/Jenkinsfile_testsuite b/jenkinsfiles/Jenkinsfile_testsuite index d13e531c..c2d55454 100644 --- a/jenkinsfiles/Jenkinsfile_testsuite +++ b/jenkinsfiles/Jenkinsfile_testsuite @@ -85,7 +85,7 @@ properties([ //disableConcurrentBuilds(), //pipelineTriggers([pollSCM(ignorePostCommitHooks: true, scmpoll_spec: '''H/30 8-19 * * 1-5''')]), parameters([ - extendedChoice(name: 'CLOUD_REGION', type: 'PT_MULTI_SELECT', value: 'esxifree/dougalab,aws/eu-west-1,gcp/europe-west1', description: 'Specify which cloud/region(s) to test', visibleItemCount: 5), + extendedChoice(name: 'CLOUD_REGION', type: 'PT_MULTI_SELECT', value: 'esxifree/dougalab,aws/eu-west-1,gcp/europe-west1,azure/westeurope', description: 'Specify which cloud/region(s) to test', visibleItemCount: 5), choice(name: 'BUILDENV', choices: ['', 'sandbox'], description: "The environment in which to run the tests"), string(name: 'CLUSTER_ID', defaultValue: 'testsuite', trim: true), [name: 'DNS_FORCE_DISABLE', $class: 'ChoiceParameter', choiceType: 'PT_RADIO', description: '', randomName: 'choice-parameter-31196915540455', script: [$class: 'GroovyScript', fallbackScript: [classpath: [], sandbox: true, script: ''], script: [classpath: [], sandbox: true, script: 'return [\'false:selected\',\'true\',\'true,false\']']]], diff --git a/readiness/tasks/main.yml b/readiness/tasks/main.yml index a3d57b01..966ef941 100644 --- a/readiness/tasks/main.yml +++ b/readiness/tasks/main.yml @@ -1,7 +1,7 @@ --- - name: readiness | Remove maintenance mode - include_tasks: remove_maintenance_mode.yml + include_tasks: "remove_maintenance_mode_{{cluster_vars.type}}.yml" when: (prometheus_set_unset_maintenance_mode is defined and prometheus_set_unset_maintenance_mode|bool) - name: readiness | create/update DNS CNAME records diff --git a/readiness/tasks/remove_maintenance_mode.yml b/readiness/tasks/remove_maintenance_mode.yml deleted file mode 100644 index 3ecc5fde..00000000 --- a/readiness/tasks/remove_maintenance_mode.yml +++ /dev/null @@ -1,58 +0,0 @@ ---- - -- block: - - name: remove_maintenance_mode/aws | Get existing AWS EC2 instance info - ec2_instance_info: - filters: - "tag:cluster_name": "{{cluster_name}}" - "instance-state-name": ["running", "stopped"] - aws_access_key: "{{cluster_vars[buildenv].aws_access_key}}" - aws_secret_key: "{{cluster_vars[buildenv].aws_secret_key}}" - region: "{{cluster_vars.region}}" - register: r__ec2_instance_info - delegate_to: localhost - run_once: true - - - name: remove_maintenance_mode/aws | Set maintenance_mode to false - ec2_tag: - aws_access_key: "{{cluster_vars[buildenv].aws_access_key}}" - aws_secret_key: "{{cluster_vars[buildenv].aws_secret_key}}" - region: "{{cluster_vars.region}}" - resource: "{{ item }}" - tags: - maintenance_mode: "false" - delegate_to: localhost - run_once: true - with_items: "{{ r__ec2_instance_info.instances | json_query(\"[].instance_id\") }}" - when: cluster_vars.type == "aws" - -- block: - - name: remove_maintenance_mode/gcp | Get existing GCP GCE instance info (per AZ) - gcp_compute_instance_info: - zone: "{{cluster_vars.region}}-{{item}}" - filters: - - "labels.cluster_name = {{cluster_name}}" - project: "{{cluster_vars[buildenv].vpc_project_id}}" - auth_kind: "serviceaccount" - service_account_file: "{{gcp_credentials_file}}" - scopes: ["https://www.googleapis.com/auth/compute.readonly"] - with_items: "{{ cluster_vars[buildenv].hosttype_vars | json_query(\"*[vms_by_az][][keys(@)][][]\") | unique }}" - register: r__gcp_compute_instance_info - delegate_to: localhost - run_once: true - - # Use this because the gce_labels command does not replace existing labels. https://github.com/ansible/ansible/pull/59891 - - name: remove_maintenance_mode/gcp | Set maintenance_mode to false - gcp_compute_instance: - name: "{{item.name}}" - project: "{{cluster_vars[buildenv].vpc_project_id}}" - zone: "{{ item.zone | regex_replace('^.*/(.*)$', '\\1') }}" - auth_kind: "serviceaccount" - service_account_file: "{{gcp_credentials_file}}" - deletion_protection: "{{cluster_vars[buildenv].deletion_protection}}" - status: "{{item.status}}" - labels: "{{ item.labels | combine({'maintenance_mode': 'false'}) }}" - with_items: "{{ r__gcp_compute_instance_info.results | json_query(\"[?resources[?labels]].resources[]\") }}" - delegate_to: localhost - run_once: true - when: cluster_vars.type == "gcp" diff --git a/readiness/tasks/remove_maintenance_mode_aws.yml b/readiness/tasks/remove_maintenance_mode_aws.yml new file mode 100644 index 00000000..f000dc9b --- /dev/null +++ b/readiness/tasks/remove_maintenance_mode_aws.yml @@ -0,0 +1,13 @@ +--- + +- name: remove_maintenance_mode/aws | Set maintenance_mode to false + ec2_tag: + aws_access_key: "{{cluster_vars[buildenv].aws_access_key}}" + aws_secret_key: "{{cluster_vars[buildenv].aws_secret_key}}" + region: "{{cluster_vars.region}}" + resource: "{{ item }}" + tags: + maintenance_mode: "false" + delegate_to: localhost + run_once: true + with_items: "{{ cluster_hosts_state | json_query(\"[].instance_id\") }}" diff --git a/readiness/tasks/remove_maintenance_mode_azure.yml b/readiness/tasks/remove_maintenance_mode_azure.yml new file mode 100644 index 00000000..e130f34a --- /dev/null +++ b/readiness/tasks/remove_maintenance_mode_azure.yml @@ -0,0 +1,30 @@ +--- + +- name: remove_maintenance_mode/azure | Set maintenance_mode=false asynchronously + azure.azcollection.azure_rm_virtualmachine: + client_id: "{{cluster_vars[buildenv].azure_client_id}}" + secret: "{{cluster_vars[buildenv].azure_secret}}" + subscription_id: "{{cluster_vars[buildenv].azure_subscription_id}}" + tenant: "{{cluster_vars[buildenv].azure_tenant}}" + resource_group: "{{cluster_vars[buildenv].azure_resource_group}}" + append_tags: yes + tags: + maintenance_mode: "false" + name: "{{ item.name }}" + zones: ["{{ (item.regionzone.split('-'))[1] }}"] + register: r__azure_rm_virtualmachine + with_items: "{{ cluster_hosts_state }}" + delegate_to: localhost + run_once: true + async: 7200 + poll: 0 + +- name: remove_maintenance_mode/azure | Wait for maintenance_mode labelling to finish + async_status: + jid: "{{ item.ansible_job_id }}" + register: async_jobs + until: async_jobs.finished + retries: 300 + with_items: "{{r__azure_rm_virtualmachine.results}}" + delegate_to: localhost + run_once: true diff --git a/readiness/tasks/remove_maintenance_mode_esxifree.yml b/readiness/tasks/remove_maintenance_mode_esxifree.yml new file mode 100644 index 00000000..de1369eb --- /dev/null +++ b/readiness/tasks/remove_maintenance_mode_esxifree.yml @@ -0,0 +1,11 @@ +--- + +- name: remove_maintenance_mode/esxifree | Set maintenance_mode to false + esxifree_guest: + hostname: "{{ cluster_vars.esxi_ip }}" + username: "{{ cluster_vars.username }}" + password: "{{ cluster_vars.password }}" + name: "{{item.name}}" + state: "unchanged" + annotation: "{{ item.tagslabels | combine({'maintenance_mode': 'false'}) }}" + with_items: "{{ cluster_hosts_state | json_query(\"[].instance_id\") }}" diff --git a/readiness/tasks/remove_maintenance_mode_gcp.yml b/readiness/tasks/remove_maintenance_mode_gcp.yml new file mode 100644 index 00000000..4de96816 --- /dev/null +++ b/readiness/tasks/remove_maintenance_mode_gcp.yml @@ -0,0 +1,29 @@ +--- + +# Use this because the gce_labels command does not replace existing labels. https://github.com/ansible/ansible/pull/59891 +- name: remove_maintenance_mode/gcp | Set maintenance_mode=false asynchronously + gcp_compute_instance: + name: "{{item.name}}" + project: "{{cluster_vars[buildenv].vpc_project_id}}" + zone: "{{ item.regionzone | regex_replace('^.*/(.*)$', '\\1') }}" + auth_kind: "serviceaccount" + service_account_file: "{{gcp_credentials_file}}" + deletion_protection: "{{cluster_vars[buildenv].deletion_protection}}" + status: "{{item.instance_state}}" + labels: "{{ item.tagslabels | combine({'maintenance_mode': 'false'}) }}" + register: r__gcp_compute_instance + with_items: "{{ cluster_hosts_state }}" + delegate_to: localhost + run_once: true + async: 7200 + poll: 0 + +- name: remove_maintenance_mode/gcp | Wait for maintenance_mode labelling to finish + async_status: + jid: "{{ item.ansible_job_id }}" + register: async_jobs + until: async_jobs.finished + retries: 300 + with_items: "{{r__gcp_compute_instance.results}}" + delegate_to: localhost + run_once: true diff --git a/redeploy/__common/tasks/poweroff_vms.yml b/redeploy/__common/tasks/poweroff_vms.yml deleted file mode 100644 index 3b8fccf0..00000000 --- a/redeploy/__common/tasks/poweroff_vms.yml +++ /dev/null @@ -1,80 +0,0 @@ ---- - -- name: poweroff_vms | hosts_to_stop - debug: msg="{{hosts_to_stop}}" - -- block: - - name: poweroff_vms | Power-off AWS EC2 VM(s) and set maintenance_mode=true - block: - - name: poweroff_vms | Set maintenance_mode label on AWS VM(s) - ec2_tag: - aws_access_key: "{{cluster_vars[buildenv].aws_access_key}}" - aws_secret_key: "{{cluster_vars[buildenv].aws_secret_key}}" - region: "{{cluster_vars.region}}" - resource: "{{ item.instance_id }}" - tags: - maintenance_mode: "true" - with_items: "{{ hosts_to_stop }}" - - - name: poweroff_vms | Power-off AWS EC2 VM(s) - ec2: - aws_access_key: "{{cluster_vars[buildenv].aws_access_key}}" - aws_secret_key: "{{cluster_vars[buildenv].aws_secret_key}}" - region: "{{ cluster_vars.region }}" - state: "stopped" - instance_ids: "{{ hosts_to_stop | json_query(\"[].instance_id\") }}" - wait: true - delegate_to: localhost - run_once: true - when: cluster_vars.type == "aws" - - - - name: poweroff_vms | Power-off GCP GCE VMs asynchronously - block: - - name: poweroff_vms | Power-off GCP GCE VM(s) and set maintenance_mode=true - gcp_compute_instance: - name: "{{item.name}}" - project: "{{cluster_vars[buildenv].vpc_project_id}}" - zone: "{{ item.regionzone }}" - auth_kind: "serviceaccount" - service_account_file: "{{gcp_credentials_file}}" - deletion_protection: "{{cluster_vars[buildenv].deletion_protection}}" - status: "TERMINATED" - labels: "{{ item.tagslabels | combine({'maintenance_mode': 'true'}) }}" - with_items: "{{ hosts_to_stop }}" - register: r__gcp_compute_instance - async: 7200 - poll: 0 - - - name: poweroff_vms | Wait for GCP GCE instance(s) to power-off - async_status: - jid: "{{ item.ansible_job_id }}" - register: async_jobs - until: async_jobs.finished - retries: 300 - with_items: "{{r__gcp_compute_instance.results}}" - when: cluster_vars.type == "gcp" - - - - name: poweroff_vms | Power-off vmware VM(s) and set maintenance_mode=true - block: - - name: poweroff_vms | Set maintenance_mode label on esxifree VM(s) - esxifree_guest: - hostname: "{{ cluster_vars.esxi_ip }}" - username: "{{ cluster_vars.username }}" - password: "{{ cluster_vars.password }}" - name: "{{item.name}}" - state: unchanged - annotation: "{{ item.tagslabels | combine({'maintenance_mode': 'true'}) }}" - with_items: "{{ hosts_to_stop }}" - - - name: poweroff_vms | Power-off esxifree VM(s) - esxifree_guest: - hostname: "{{ cluster_vars.esxi_ip }}" - username: "{{ cluster_vars.username }}" - password: "{{ cluster_vars.password }}" - name: "{{item.name}}" - state: shutdownguest - with_items: "{{ hosts_to_stop }}" - when: cluster_vars.type == "esxifree" - when: hosts_to_stop | length \ No newline at end of file diff --git a/redeploy/__common/tasks/poweroff_vms_aws.yml b/redeploy/__common/tasks/poweroff_vms_aws.yml new file mode 100644 index 00000000..b1e6c015 --- /dev/null +++ b/redeploy/__common/tasks/poweroff_vms_aws.yml @@ -0,0 +1,28 @@ +--- + +- name: poweroff_vms/aws | hosts_to_stop + debug: msg="{{hosts_to_stop}}" + +- name: poweroff_vms/aws | Power-off VM(s) and set maintenance_mode=true + block: + - name: poweroff_vms/aws | Set maintenance_mode=true + ec2_tag: + aws_access_key: "{{cluster_vars[buildenv].aws_access_key}}" + aws_secret_key: "{{cluster_vars[buildenv].aws_secret_key}}" + region: "{{cluster_vars.region}}" + resource: "{{ item.instance_id }}" + tags: + maintenance_mode: "true" + with_items: "{{ hosts_to_stop }}" + + - name: poweroff_vms/aws | Power-off VM(s) + ec2: + aws_access_key: "{{cluster_vars[buildenv].aws_access_key}}" + aws_secret_key: "{{cluster_vars[buildenv].aws_secret_key}}" + region: "{{ cluster_vars.region }}" + state: "stopped" + instance_ids: "{{ hosts_to_stop | json_query(\"[].instance_id\") }}" + wait: true + delegate_to: localhost + run_once: true + when: hosts_to_stop | length diff --git a/redeploy/__common/tasks/poweroff_vms_azure.yml b/redeploy/__common/tasks/poweroff_vms_azure.yml new file mode 100644 index 00000000..4d0cbeda --- /dev/null +++ b/redeploy/__common/tasks/poweroff_vms_azure.yml @@ -0,0 +1,33 @@ +--- + +- name: poweroff_vms/azure | hosts_to_stop + debug: msg="{{hosts_to_stop}}" + +- name: poweroff_vms/azure | Power-off VM(s) and set maintenance_mode=true + block: + - name: poweroff_vms/azure | Power-off VM(s) asynchronously and set maintenance_mode=true + azure.azcollection.azure_rm_virtualmachine: + client_id: "{{cluster_vars[buildenv].azure_client_id}}" + secret: "{{cluster_vars[buildenv].azure_secret}}" + subscription_id: "{{cluster_vars[buildenv].azure_subscription_id}}" + tenant: "{{cluster_vars[buildenv].azure_tenant}}" + resource_group: "{{cluster_vars[buildenv].azure_resource_group}}" + append_tags: yes + tags: + maintenance_mode: "true" + name: "{{ item.name }}" + started: no + zones: ["{{ (item.regionzone.split('-'))[1] }}"] + register: r__azure_rm_virtualmachine + with_items: "{{ hosts_to_stop }}" + async: 7200 + poll: 0 + + - name: poweroff_vms/azure | Wait for VM(s) to power-off + async_status: + jid: "{{ item.ansible_job_id }}" + register: async_jobs + until: async_jobs.finished + retries: 300 + with_items: "{{r__azure_rm_virtualmachine.results}}" + when: hosts_to_stop | length \ No newline at end of file diff --git a/redeploy/__common/tasks/poweroff_vms_esxifree.yml b/redeploy/__common/tasks/poweroff_vms_esxifree.yml new file mode 100644 index 00000000..eecd2e2c --- /dev/null +++ b/redeploy/__common/tasks/poweroff_vms_esxifree.yml @@ -0,0 +1,26 @@ +--- + +- name: poweroff_vms/esxifree | hosts_to_stop + debug: msg="{{hosts_to_stop}}" + +- name: poweroff_vms/esxifree | Power-off VM(s) and set maintenance_mode=true + block: + - name: poweroff_vms/esxifree | Set maintenance_mode=true + esxifree_guest: + hostname: "{{ cluster_vars.esxi_ip }}" + username: "{{ cluster_vars.username }}" + password: "{{ cluster_vars.password }}" + name: "{{item.name}}" + state: unchanged + annotation: "{{ item.tagslabels | combine({'maintenance_mode': 'true'}) }}" + with_items: "{{ hosts_to_stop }}" + + - name: poweroff_vms/esxifree | Power-off VM(s) + esxifree_guest: + hostname: "{{ cluster_vars.esxi_ip }}" + username: "{{ cluster_vars.username }}" + password: "{{ cluster_vars.password }}" + name: "{{item.name}}" + state: shutdownguest + with_items: "{{ hosts_to_stop }}" + when: hosts_to_stop | length \ No newline at end of file diff --git a/redeploy/__common/tasks/poweroff_vms_gcp.yml b/redeploy/__common/tasks/poweroff_vms_gcp.yml new file mode 100644 index 00000000..a2cea310 --- /dev/null +++ b/redeploy/__common/tasks/poweroff_vms_gcp.yml @@ -0,0 +1,30 @@ +--- + +- name: poweroff_vms/gcp | hosts_to_stop + debug: msg="{{hosts_to_stop}}" + +- name: poweroff_vms/gcp | Power-off VM(s) and set maintenance_mode=true + block: + - name: poweroff_vms/gcp | Power-off VMs asynchronously + gcp_compute_instance: + name: "{{item.name}}" + project: "{{cluster_vars[buildenv].vpc_project_id}}" + zone: "{{ item.regionzone }}" + auth_kind: "serviceaccount" + service_account_file: "{{gcp_credentials_file}}" + deletion_protection: "{{cluster_vars[buildenv].deletion_protection}}" + status: "TERMINATED" + labels: "{{ item.tagslabels | combine({'maintenance_mode': 'true'}) }}" + with_items: "{{ hosts_to_stop }}" + register: r__gcp_compute_instance + async: 7200 + poll: 0 + + - name: poweroff_vms/gcp | Wait for VM(s) to power-off + async_status: + jid: "{{ item.ansible_job_id }}" + register: async_jobs + until: async_jobs.finished + retries: 300 + with_items: "{{r__gcp_compute_instance.results}}" + when: hosts_to_stop | length \ No newline at end of file diff --git a/redeploy/__common/tasks/poweron_vms.yml b/redeploy/__common/tasks/poweron_vms.yml deleted file mode 100644 index da950fff..00000000 --- a/redeploy/__common/tasks/poweron_vms.yml +++ /dev/null @@ -1,58 +0,0 @@ ---- - -- name: poweron_vms | hosts_to_start - debug: msg="{{hosts_to_start}}" - -- block: - - name: poweron_vms | Power-on AWS EC2 VM(s) - block: - - name: poweron_vms | Power-on AWS EC2 VM(s) - ec2: - aws_access_key: "{{cluster_vars[buildenv].aws_access_key}}" - aws_secret_key: "{{cluster_vars[buildenv].aws_secret_key}}" - region: "{{ cluster_vars.region }}" - state: "running" - instance_ids: "{{ hosts_to_start | json_query(\"[].instance_id\") }}" - wait: true - delegate_to: localhost - run_once: true - when: cluster_vars.type == "aws" - - - name: poweron_vms | Power-on GCP GCE VM(s) asynchronously - block: - - name: poweron_vms | Power-on GCP GCE VM(s) - gcp_compute_instance: - name: "{{item.name}}" - project: "{{cluster_vars[buildenv].vpc_project_id}}" - zone: "{{ item.regionzone }}" - auth_kind: "serviceaccount" - service_account_file: "{{gcp_credentials_file}}" - deletion_protection: "{{cluster_vars[buildenv].deletion_protection}}" - status: "RUNNING" - labels: "{{ item.tagslabels }}" - with_items: "{{ hosts_to_start }}" - register: r__gcp_compute_instance - async: 7200 - poll: 0 - - - name: poweron_vms | Wait for GCP GCE instance(s) to power-on - async_status: - jid: "{{ item.ansible_job_id }}" - register: async_jobs - until: async_jobs.finished - retries: 300 - with_items: "{{r__gcp_compute_instance.results}}" - when: cluster_vars.type == "gcp" - - - name: poweron_vms | Power-on esxifree VM(s) - block: - - name: poweron_vms | Power-on esxifree VM(s) - esxifree_guest: - hostname: "{{ cluster_vars.esxi_ip }}" - username: "{{ cluster_vars.username }}" - password: "{{ cluster_vars.password }}" - name: "{{item.name}}" - state: poweredon - with_items: "{{ hosts_to_start }}" - when: cluster_vars.type == "esxifree" - when: hosts_to_start | length \ No newline at end of file diff --git a/redeploy/__common/tasks/poweron_vms_aws.yml b/redeploy/__common/tasks/poweron_vms_aws.yml new file mode 100644 index 00000000..51eda15e --- /dev/null +++ b/redeploy/__common/tasks/poweron_vms_aws.yml @@ -0,0 +1,18 @@ +--- + +- name: poweron_vms/aws | hosts_to_start + debug: msg="{{hosts_to_start}}" + +- name: poweron_vms/aws | Power-on VM(s) + block: + - name: poweron_vms/aws | Power-on VM(s) + ec2: + aws_access_key: "{{cluster_vars[buildenv].aws_access_key}}" + aws_secret_key: "{{cluster_vars[buildenv].aws_secret_key}}" + region: "{{ cluster_vars.region }}" + state: "running" + instance_ids: "{{ hosts_to_start | json_query(\"[].instance_id\") }}" + wait: true + delegate_to: localhost + run_once: true + when: hosts_to_start | length \ No newline at end of file diff --git a/redeploy/__common/tasks/poweron_vms_azure.yml b/redeploy/__common/tasks/poweron_vms_azure.yml new file mode 100644 index 00000000..f3b1e7ec --- /dev/null +++ b/redeploy/__common/tasks/poweron_vms_azure.yml @@ -0,0 +1,31 @@ +--- + +- name: poweron_vms/azure | hosts_to_start + debug: msg="{{hosts_to_start}}" + +- name: poweron_vms/azure | Power-on VM(s) + block: + - name: poweron_vms/azure | Power-on VM(s) asynchronously + azure.azcollection.azure_rm_virtualmachine: + client_id: "{{cluster_vars[buildenv].azure_client_id}}" + secret: "{{cluster_vars[buildenv].azure_secret}}" + subscription_id: "{{cluster_vars[buildenv].azure_subscription_id}}" + tenant: "{{cluster_vars[buildenv].azure_tenant}}" + resource_group: "{{cluster_vars[buildenv].azure_resource_group}}" + append_tags: yes + name: "{{ item.name }}" + started: yes + zones: ["{{ (item.regionzone.split('-'))[1] }}"] + register: r__azure_rm_virtualmachine + with_items: "{{ hosts_to_start }}" + async: 7200 + poll: 0 + + - name: poweron_vms/azure | Wait for VM(s) to power-on + async_status: + jid: "{{ item.ansible_job_id }}" + register: async_jobs + until: async_jobs.finished + retries: 300 + with_items: "{{r__azure_rm_virtualmachine.results}}" + when: hosts_to_start | length \ No newline at end of file diff --git a/redeploy/__common/tasks/poweron_vms_esxifree.yml b/redeploy/__common/tasks/poweron_vms_esxifree.yml new file mode 100644 index 00000000..d239aabb --- /dev/null +++ b/redeploy/__common/tasks/poweron_vms_esxifree.yml @@ -0,0 +1,16 @@ +--- + +- name: poweron_vms/esxifree | hosts_to_start + debug: msg="{{hosts_to_start}}" + +- name: poweron_vms/esxifree | Power-on VM(s) + block: + - name: poweron_vms/esxifree | Power-on VM(s) + esxifree_guest: + hostname: "{{ cluster_vars.esxi_ip }}" + username: "{{ cluster_vars.username }}" + password: "{{ cluster_vars.password }}" + name: "{{item.name}}" + state: poweredon + with_items: "{{ hosts_to_start }}" + when: hosts_to_start | length diff --git a/redeploy/__common/tasks/poweron_vms_gcp.yml b/redeploy/__common/tasks/poweron_vms_gcp.yml new file mode 100644 index 00000000..e2f9d95e --- /dev/null +++ b/redeploy/__common/tasks/poweron_vms_gcp.yml @@ -0,0 +1,30 @@ +--- + +- name: poweron_vms/gcp | hosts_to_start + debug: msg="{{hosts_to_start}}" + +- name: poweron_vms/gcp | Power-on VM(s) + block: + - name: poweron_vms/gcp | Power-on VM(s) asynchronously + gcp_compute_instance: + name: "{{item.name}}" + project: "{{cluster_vars[buildenv].vpc_project_id}}" + zone: "{{ item.regionzone }}" + auth_kind: "serviceaccount" + service_account_file: "{{gcp_credentials_file}}" + deletion_protection: "{{cluster_vars[buildenv].deletion_protection}}" + status: "RUNNING" + labels: "{{ item.tagslabels }}" + with_items: "{{ hosts_to_start }}" + register: r__gcp_compute_instance + async: 7200 + poll: 0 + + - name: poweron_vms/gcp | Wait for VM(s) to power-on + async_status: + jid: "{{ item.ansible_job_id }}" + register: async_jobs + until: async_jobs.finished + retries: 300 + with_items: "{{r__gcp_compute_instance.results}}" + when: hosts_to_start | length diff --git a/redeploy/__common/tasks/set_lifecycle_state_label.yml b/redeploy/__common/tasks/set_lifecycle_state_label.yml deleted file mode 100644 index 33b97b34..00000000 --- a/redeploy/__common/tasks/set_lifecycle_state_label.yml +++ /dev/null @@ -1,43 +0,0 @@ ---- - -- name: set_lifecycle_state_label | hosts_to_relabel - debug: msg="{{hosts_to_relabel}}" - -- block: - - name: set_lifecycle_state_label | Change lifecycle_state label on AWS EC2 VM - ec2_tag: - aws_access_key: "{{cluster_vars[buildenv].aws_access_key}}" - aws_secret_key: "{{cluster_vars[buildenv].aws_secret_key}}" - region: "{{cluster_vars.region}}" - resource: "{{ item.instance_id }}" - tags: - lifecycle_state: "{{new_state}}" - with_items: "{{ hosts_to_relabel }}" - when: cluster_vars.type == "aws" - - - - name: set_lifecycle_state_label | Change lifecycle_state label on GCP GCE VM - gcp_compute_instance: - name: "{{item.name}}" - project: "{{cluster_vars[buildenv].vpc_project_id}}" - zone: "{{ item.regionzone }}" - auth_kind: "serviceaccount" - service_account_file: "{{gcp_credentials_file}}" - deletion_protection: "{{cluster_vars[buildenv].deletion_protection}}" - status: "{{item.instance_state}}" - labels: "{{ item.tagslabels | combine({'lifecycle_state': new_state}) }}" - with_items: "{{ hosts_to_relabel }}" - when: cluster_vars.type == "gcp" - - - - name: set_lifecycle_state_label | Change lifecycle_state label on esxifree VM - esxifree_guest: - hostname: "{{ cluster_vars.esxi_ip }}" - username: "{{ cluster_vars.username }}" - password: "{{ cluster_vars.password }}" - name: "{{item.name}}" - state: "unchanged" - annotation: "{{ item.tagslabels | combine({'lifecycle_state': new_state}) }}" - with_items: "{{ hosts_to_relabel }}" - when: cluster_vars.type == "esxifree" - when: hosts_to_relabel | length \ No newline at end of file diff --git a/redeploy/__common/tasks/set_lifecycle_state_label_aws.yml b/redeploy/__common/tasks/set_lifecycle_state_label_aws.yml new file mode 100644 index 00000000..57ea75b4 --- /dev/null +++ b/redeploy/__common/tasks/set_lifecycle_state_label_aws.yml @@ -0,0 +1,14 @@ +--- + +- name: set_lifecycle_state_label/aws | hosts_to_relabel + debug: msg="{{hosts_to_relabel}}" + +- name: "set_lifecycle_state_label/aws | Change lifecycle_state label to {{new_state}}" + ec2_tag: + aws_access_key: "{{cluster_vars[buildenv].aws_access_key}}" + aws_secret_key: "{{cluster_vars[buildenv].aws_secret_key}}" + region: "{{cluster_vars.region}}" + resource: "{{ item.instance_id }}" + tags: + lifecycle_state: "{{new_state}}" + with_items: "{{ hosts_to_relabel | default([]) }}" diff --git a/redeploy/__common/tasks/set_lifecycle_state_label_azure.yml b/redeploy/__common/tasks/set_lifecycle_state_label_azure.yml new file mode 100644 index 00000000..6aa7060d --- /dev/null +++ b/redeploy/__common/tasks/set_lifecycle_state_label_azure.yml @@ -0,0 +1,18 @@ +--- + +- name: set_lifecycle_state_label/azure | hosts_to_relabel + debug: msg="{{hosts_to_relabel}}" + +- name: "set_lifecycle_state_label/azure | Change lifecycle_state label to {{new_state}}" + azure.azcollection.azure_rm_virtualmachine: + client_id: "{{cluster_vars[buildenv].azure_client_id}}" + secret: "{{cluster_vars[buildenv].azure_secret}}" + subscription_id: "{{cluster_vars[buildenv].azure_subscription_id}}" + tenant: "{{cluster_vars[buildenv].azure_tenant}}" + resource_group: "{{cluster_vars[buildenv].azure_resource_group}}" + append_tags: yes + name: "{{ item.name }}" + tags: + lifecycle_state: "{{ new_state }}" + zones: ["{{ (item.regionzone.split('-'))[1] }}"] + with_items: "{{ hosts_to_relabel | default([]) }}" diff --git a/redeploy/__common/tasks/set_lifecycle_state_label_esxifree.yml b/redeploy/__common/tasks/set_lifecycle_state_label_esxifree.yml new file mode 100644 index 00000000..8598d6ec --- /dev/null +++ b/redeploy/__common/tasks/set_lifecycle_state_label_esxifree.yml @@ -0,0 +1,14 @@ +--- + +- name: set_lifecycle_state_label/esxifree | hosts_to_relabel + debug: msg="{{hosts_to_relabel}}" + +- name: "set_lifecycle_state_label/esxifree | Change lifecycle_state label to {{new_state}}" + esxifree_guest: + hostname: "{{ cluster_vars.esxi_ip }}" + username: "{{ cluster_vars.username }}" + password: "{{ cluster_vars.password }}" + name: "{{item.name}}" + state: "unchanged" + annotation: "{{ item.tagslabels | combine({'lifecycle_state': new_state}) }}" + with_items: "{{ hosts_to_relabel | default([]) }}" diff --git a/redeploy/__common/tasks/set_lifecycle_state_label_gcp.yml b/redeploy/__common/tasks/set_lifecycle_state_label_gcp.yml new file mode 100644 index 00000000..93f0986e --- /dev/null +++ b/redeploy/__common/tasks/set_lifecycle_state_label_gcp.yml @@ -0,0 +1,16 @@ +--- + +- name: set_lifecycle_state_label/aws | hosts_to_relabel + debug: msg="{{hosts_to_relabel}}" + +- name: "set_lifecycle_state_label/gcp | Change lifecycle_state label to {{new_state}}" + gcp_compute_instance: + name: "{{item.name}}" + project: "{{cluster_vars[buildenv].vpc_project_id}}" + zone: "{{ item.regionzone }}" + auth_kind: "serviceaccount" + service_account_file: "{{gcp_credentials_file}}" + deletion_protection: "{{cluster_vars[buildenv].deletion_protection}}" + status: "{{item.instance_state}}" + labels: "{{ item.tagslabels | combine({'lifecycle_state': new_state}) }}" + with_items: "{{ hosts_to_relabel | default([]) }}" diff --git a/redeploy/_scheme_addallnew_rmdisk_rollback/tasks/main.yml b/redeploy/_scheme_addallnew_rmdisk_rollback/tasks/main.yml index 30c5f34f..2c5857cc 100644 --- a/redeploy/_scheme_addallnew_rmdisk_rollback/tasks/main.yml +++ b/redeploy/_scheme_addallnew_rmdisk_rollback/tasks/main.yml @@ -27,14 +27,14 @@ block: - assert: { that: "'current' in (cluster_hosts_state | map(attribute='tagslabels.lifecycle_state'))", msg: "ERROR - Cannot tidy when there are no machines in the 'current' lifecycle_state. Please use '-e clean=_all_'." } - - import_role: + - include_role: name: clusterverse/clean - tasks_from: clean_dns.yml + tasks_from: dns.yml when: (hosts_to_clean | length) and (cluster_vars.dns_server is defined and cluster_vars.dns_server != "") and (cluster_vars.dns_user_domain is defined and cluster_vars.dns_user_domain != "") - - import_role: + - include_role: name: clusterverse/clean - tasks_from: clean_vms.yml + tasks_from: "{{cluster_vars.type}}.yml" when: (hosts_to_clean | length) - debug: diff --git a/redeploy/_scheme_addallnew_rmdisk_rollback/tasks/redeploy.yml b/redeploy/_scheme_addallnew_rmdisk_rollback/tasks/redeploy.yml index c774c395..2491c5bb 100644 --- a/redeploy/_scheme_addallnew_rmdisk_rollback/tasks/redeploy.yml +++ b/redeploy/_scheme_addallnew_rmdisk_rollback/tasks/redeploy.yml @@ -9,7 +9,7 @@ - name: Change lifecycle_state label from 'current' to 'retiring' include_role: name: clusterverse/redeploy/__common - tasks_from: set_lifecycle_state_label.yml + tasks_from: "set_lifecycle_state_label_{{cluster_vars.type}}.yml" vars: hosts_to_relabel: "{{ cluster_hosts_state | json_query(\"[?tagslabels.lifecycle_state=='current']\") }}" new_state: "retiring" @@ -78,7 +78,7 @@ - name: Power off old VMs include_role: name: clusterverse/redeploy/__common - tasks_from: poweroff_vms.yml + tasks_from: "poweroff_vms_{{cluster_vars.type}}.yml" vars: hosts_to_stop: "{{ cluster_hosts_state | json_query(\"[?tagslabels.lifecycle_state=='retiring']\") }}" diff --git a/redeploy/_scheme_addallnew_rmdisk_rollback/tasks/rescue.yml b/redeploy/_scheme_addallnew_rmdisk_rollback/tasks/rescue.yml index 0314c19b..779421c0 100644 --- a/redeploy/_scheme_addallnew_rmdisk_rollback/tasks/rescue.yml +++ b/redeploy/_scheme_addallnew_rmdisk_rollback/tasks/rescue.yml @@ -11,7 +11,7 @@ - name: rescue | Change lifecycle_state label from 'current' to 'redeployfail' include_role: name: clusterverse/redeploy/__common - tasks_from: set_lifecycle_state_label.yml + tasks_from: "set_lifecycle_state_label_{{cluster_vars.type}}.yml" vars: hosts_to_relabel: "{{ cluster_hosts_state | json_query(\"[?tagslabels.lifecycle_state=='current']\") }}" new_state: "redeployfail" @@ -20,7 +20,7 @@ - name: rescue | Change lifecycle_state label from 'retiring' to 'current' state include_role: name: clusterverse/redeploy/__common - tasks_from: set_lifecycle_state_label.yml + tasks_from: "set_lifecycle_state_label_{{cluster_vars.type}}.yml" vars: hosts_to_relabel: "{{ cluster_hosts_state | json_query(\"[?tagslabels.lifecycle_state=='retiring']\") }}" new_state: "current" @@ -57,7 +57,7 @@ - name: rescue | poweroff the failed VMs include_role: name: clusterverse/redeploy/__common - tasks_from: poweroff_vms.yml + tasks_from: "poweroff_vms_{{cluster_vars.type}}.yml" when: hosts_to_stop | length vars: hosts_to_stop: "{{ cluster_hosts_state | json_query(\"[?tagslabels.lifecycle_state=='redeployfail']\") }}" diff --git a/redeploy/_scheme_addnewvm_rmdisk_rollback/tasks/main.yml b/redeploy/_scheme_addnewvm_rmdisk_rollback/tasks/main.yml index 8556058b..7602e3eb 100644 --- a/redeploy/_scheme_addnewvm_rmdisk_rollback/tasks/main.yml +++ b/redeploy/_scheme_addnewvm_rmdisk_rollback/tasks/main.yml @@ -12,9 +12,9 @@ - name: Redeploy by hosttype; rollback on fail block: - name: Change lifecycle_state label from 'current' to 'retiring' - import_role: + include_role: name: clusterverse/redeploy/__common - tasks_from: set_lifecycle_state_label.yml + tasks_from: "set_lifecycle_state_label_{{cluster_vars.type}}.yml" vars: hosts_to_relabel: "{{ cluster_hosts_state | json_query(\"[?tagslabels.lifecycle_state=='current']\") }}" new_state: "retiring" @@ -46,13 +46,13 @@ - name: Power off any other retiring VM(s) that might exist if we're redeploying to a smaller topology. include_role: name: clusterverse/redeploy/__common - tasks_from: poweroff_vms.yml + tasks_from: "poweroff_vms_{{cluster_vars.type}}.yml" vars: hosts_to_stop: "{{ cluster_hosts_state | json_query(\"[?tagslabels.lifecycle_state=='retiring' && ('\"+ myhosttypes|default('') + \"' == '' || contains(['\"+ myhosttypes|default('') + \"'], tagslabels.hosttype))]\") }}" when: (canary=="finish" or canary=="none") - name: re-acquire cluster_hosts_target and cluster_hosts_state (for tidy - can't be in the tidy block because the block depends on this info being correct) - import_role: + include_role: name: clusterverse/cluster_hosts when: (canary_tidy_on_success is defined and canary_tidy_on_success|bool) @@ -71,14 +71,14 @@ block: - assert: { that: "'current' in (cluster_hosts_state | map(attribute='tagslabels.lifecycle_state'))", msg: "ERROR - Cannot tidy when there are no machines in the 'current' lifecycle_state. Please use '-e clean=_all_'." } - - import_role: + - include_role: name: clusterverse/clean - tasks_from: clean_dns.yml + tasks_from: dns.yml when: (hosts_to_clean | length) and (cluster_vars.dns_server is defined and cluster_vars.dns_server != "") and (cluster_vars.dns_user_domain is defined and cluster_vars.dns_user_domain != "") - - import_role: + - include_role: name: clusterverse/clean - tasks_from: clean_vms.yml + tasks_from: "{{cluster_vars.type}}.yml" when: (hosts_to_clean | length) - debug: diff --git a/redeploy/_scheme_addnewvm_rmdisk_rollback/tasks/redeploy_by_hosttype_by_host.yml b/redeploy/_scheme_addnewvm_rmdisk_rollback/tasks/redeploy_by_hosttype_by_host.yml index e1cc40ca..2faad177 100644 --- a/redeploy/_scheme_addnewvm_rmdisk_rollback/tasks/redeploy_by_hosttype_by_host.yml +++ b/redeploy/_scheme_addnewvm_rmdisk_rollback/tasks/redeploy_by_hosttype_by_host.yml @@ -30,7 +30,7 @@ - name: Power off old VMs include_role: name: clusterverse/redeploy/__common - tasks_from: poweroff_vms.yml + tasks_from: "poweroff_vms_{{cluster_vars.type}}.yml" vars: hosts_to_stop: "{{ hosts_to_remove }}" diff --git a/redeploy/_scheme_addnewvm_rmdisk_rollback/tasks/rescue.yml b/redeploy/_scheme_addnewvm_rmdisk_rollback/tasks/rescue.yml index 5f5e1a7f..48648609 100644 --- a/redeploy/_scheme_addnewvm_rmdisk_rollback/tasks/rescue.yml +++ b/redeploy/_scheme_addnewvm_rmdisk_rollback/tasks/rescue.yml @@ -7,7 +7,7 @@ - name: rescue | Power-on the 'retiring' VMs include_role: name: clusterverse/redeploy/__common - tasks_from: poweron_vms.yml + tasks_from: "poweron_vms_{{cluster_vars.type}}.yml" vars: hosts_to_start: "{{ cluster_hosts_state | json_query(\"[?tagslabels.lifecycle_state=='retiring']\") }}" @@ -18,7 +18,7 @@ - name: rescue | Change lifecycle_state label from 'current' to 'redeployfail' include_role: name: clusterverse/redeploy/__common - tasks_from: set_lifecycle_state_label.yml + tasks_from: "set_lifecycle_state_label_{{cluster_vars.type}}.yml" vars: hosts_to_relabel: "{{ cluster_hosts_state | json_query(\"[?tagslabels.lifecycle_state=='current']\") }}" new_state: "redeployfail" @@ -26,7 +26,7 @@ - name: rescue | Change lifecycle_state label from 'retiring' to 'current' include_role: name: clusterverse/redeploy/__common - tasks_from: set_lifecycle_state_label.yml + tasks_from: "set_lifecycle_state_label_{{cluster_vars.type}}.yml" vars: hosts_to_relabel: "{{ cluster_hosts_state | json_query(\"[?tagslabels.lifecycle_state=='retiring']\") }}" new_state: "current" @@ -61,6 +61,6 @@ - name: rescue | Power-off the VMs include_role: name: clusterverse/redeploy/__common - tasks_from: poweroff_vms.yml + tasks_from: "poweroff_vms_{{cluster_vars.type}}.yml" vars: hosts_to_stop: "{{ cluster_hosts_state | json_query(\"[?tagslabels.lifecycle_state=='redeployfail']\") }}" diff --git a/redeploy/_scheme_rmvm_keepdisk_rollback/tasks/by_hosttype_by_host.yml b/redeploy/_scheme_rmvm_keepdisk_rollback/tasks/by_hosttype_by_host.yml index 33719ddb..59ed73ab 100644 --- a/redeploy/_scheme_rmvm_keepdisk_rollback/tasks/by_hosttype_by_host.yml +++ b/redeploy/_scheme_rmvm_keepdisk_rollback/tasks/by_hosttype_by_host.yml @@ -14,7 +14,7 @@ - name: by_hosttype_by_host | Power off old VM include_role: name: clusterverse/redeploy/__common - tasks_from: poweroff_vms.yml + tasks_from: "poweroff_vms_{{cluster_vars.type}}.yml" vars: _host_to_redeploy_nosuffix: "{{host_to_redeploy.hostname | regex_replace('-(?!.*-).*')}}" #Remove the cluster_suffix from the hostname hosts_to_stop: "{{ cluster_hosts_state | to_json | from_json | json_query(\"[?tagslabels.lifecycle_state!='current' && starts_with(name, '\" + _host_to_redeploy_nosuffix + \"')]\") }}" @@ -29,14 +29,14 @@ when: r__mainclusteryml is failed or (debug_nested_log_output is defined and debug_nested_log_output|bool) - name: by_hosttype_by_host | re-acquire cluster_hosts_state (NOT cluster_hosts_target, as we are augmenting this in _add_src_diskinfo_to_cluster_hosts_target__ (also, it is not affected by change of state)) - import_role: + include_role: name: clusterverse/cluster_hosts - tasks_from: get_cluster_hosts_state.yml + tasks_from: "get_cluster_hosts_state_{{cluster_vars.type}}.yml" - name: by_hosttype_by_host | Power on new VM (not needed for normal redeploy, but for rescue case) include_role: name: clusterverse/redeploy/__common - tasks_from: poweron_vms.yml + tasks_from: "poweron_vms_{{cluster_vars.type}}.yml" vars: hosts_to_start: "{{ cluster_hosts_state | selectattr('name', '==', host_to_redeploy.hostname) | list }}" @@ -45,6 +45,6 @@ name: clusterverse/dynamic_inventory - name: by_hosttype_by_host | re-acquire cluster_hosts_state (NOT cluster_hosts_target, as we are augmenting this in _add_src_diskinfo_to_cluster_hosts_target__ (also, it is not affected by change of state)) - import_role: + include_role: name: clusterverse/cluster_hosts - tasks_from: get_cluster_hosts_state.yml + tasks_from: "get_cluster_hosts_state_{{cluster_vars.type}}.yml" diff --git a/redeploy/_scheme_rmvm_keepdisk_rollback/tasks/main.yml b/redeploy/_scheme_rmvm_keepdisk_rollback/tasks/main.yml index f95d7820..2b16a089 100644 --- a/redeploy/_scheme_rmvm_keepdisk_rollback/tasks/main.yml +++ b/redeploy/_scheme_rmvm_keepdisk_rollback/tasks/main.yml @@ -10,7 +10,7 @@ - name: Change lifecycle_state label from 'current' to 'retiring' include_role: name: clusterverse/redeploy/__common - tasks_from: set_lifecycle_state_label.yml + tasks_from: "set_lifecycle_state_label_{{cluster_vars.type}}.yml" vars: hosts_to_relabel: "{{ cluster_hosts_state | json_query(\"[?tagslabels.lifecycle_state=='current']\") }}" new_state: "retiring" @@ -55,7 +55,7 @@ - name: rescue | Change lifecycle_state label from 'current' to 'redeployfail' include_role: name: clusterverse/redeploy/__common - tasks_from: set_lifecycle_state_label.yml + tasks_from: "set_lifecycle_state_label_{{cluster_vars.type}}.yml" vars: hosts_to_relabel: "{{ cluster_hosts_state | json_query(\"[?tagslabels.lifecycle_state=='current']\") }}" new_state: "redeployfail" @@ -63,7 +63,7 @@ - name: rescue | Change lifecycle_state label from 'retiring' to 'current' include_role: name: clusterverse/redeploy/__common - tasks_from: set_lifecycle_state_label.yml + tasks_from: "set_lifecycle_state_label_{{cluster_vars.type}}.yml" vars: hosts_to_relabel: "{{ cluster_hosts_state | json_query(\"[?tagslabels.lifecycle_state=='retiring']\") }}" new_state: "current" @@ -99,12 +99,12 @@ - include_role: name: clusterverse/clean - tasks_from: clean_dns.yml + tasks_from: dns.yml when: (hosts_to_clean | length) and (cluster_vars.dns_server is defined and cluster_vars.dns_server != "") and (cluster_vars.dns_user_domain is defined and cluster_vars.dns_user_domain != "") - include_role: name: clusterverse/clean - tasks_from: clean_vms.yml + tasks_from: "{{cluster_vars.type}}.yml" when: (hosts_to_clean | length) - debug: diff --git a/redeploy/_scheme_rmvm_rmdisk_only/tasks/by_hosttype_by_host.yml b/redeploy/_scheme_rmvm_rmdisk_only/tasks/by_hosttype_by_host.yml index 4573b744..bb2b3645 100644 --- a/redeploy/_scheme_rmvm_rmdisk_only/tasks/by_hosttype_by_host.yml +++ b/redeploy/_scheme_rmvm_rmdisk_only/tasks/by_hosttype_by_host.yml @@ -9,9 +9,9 @@ hosts_to_remove: "{{ cluster_hosts_state | json_query(\"[?name==`\" + host_to_del.hostname + \"`]\") }}" when: predeleterole is defined and predeleterole != "" -- import_role: +- include_role: name: clusterverse/clean - tasks_from: clean_vms.yml + tasks_from: "{{cluster_vars.type}}.yml" vars: hosts_to_clean: "{{ cluster_hosts_state | json_query(\"[?name==`\" + host_to_del.hostname + \"`]\") }}" From 80f96e0c691d363ae62796f72475a12eeeb6a77f Mon Sep 17 00:00:00 2001 From: Dougal Seeley Date: Wed, 17 Mar 2021 18:32:18 +0000 Subject: [PATCH 50/58] Simplify powerstate changing --- jenkinsfiles/Jenkinsfile_testsuite | 2 +- .../__common/tasks/powerchange_vms_aws.yml | 28 +++++++++ ...ms_azure.yml => powerchange_vms_azure.yml} | 63 ++++++++++--------- .../tasks/powerchange_vms_esxifree.yml | 28 +++++++++ .../__common/tasks/powerchange_vms_gcp.yml | 31 +++++++++ redeploy/__common/tasks/poweroff_vms_aws.yml | 28 --------- .../__common/tasks/poweroff_vms_azure.yml | 33 ---------- .../__common/tasks/poweroff_vms_esxifree.yml | 26 -------- redeploy/__common/tasks/poweroff_vms_gcp.yml | 30 --------- redeploy/__common/tasks/poweron_vms_aws.yml | 18 ------ .../__common/tasks/poweron_vms_esxifree.yml | 16 ----- redeploy/__common/tasks/poweron_vms_gcp.yml | 30 --------- .../tasks/redeploy.yml | 5 +- .../tasks/rescue.yml | 8 +-- .../tasks/main.yml | 9 ++- .../tasks/redeploy_by_hosttype_by_host.yml | 5 +- .../tasks/rescue.yml | 10 +-- .../tasks/by_hosttype_by_host.yml | 15 +++-- 18 files changed, 152 insertions(+), 233 deletions(-) create mode 100644 redeploy/__common/tasks/powerchange_vms_aws.yml rename redeploy/__common/tasks/{poweron_vms_azure.yml => powerchange_vms_azure.yml} (51%) create mode 100644 redeploy/__common/tasks/powerchange_vms_esxifree.yml create mode 100644 redeploy/__common/tasks/powerchange_vms_gcp.yml delete mode 100644 redeploy/__common/tasks/poweroff_vms_aws.yml delete mode 100644 redeploy/__common/tasks/poweroff_vms_azure.yml delete mode 100644 redeploy/__common/tasks/poweroff_vms_esxifree.yml delete mode 100644 redeploy/__common/tasks/poweroff_vms_gcp.yml delete mode 100644 redeploy/__common/tasks/poweron_vms_aws.yml delete mode 100644 redeploy/__common/tasks/poweron_vms_esxifree.yml delete mode 100644 redeploy/__common/tasks/poweron_vms_gcp.yml diff --git a/jenkinsfiles/Jenkinsfile_testsuite b/jenkinsfiles/Jenkinsfile_testsuite index c2d55454..6059e25a 100644 --- a/jenkinsfiles/Jenkinsfile_testsuite +++ b/jenkinsfiles/Jenkinsfile_testsuite @@ -86,7 +86,7 @@ properties([ //pipelineTriggers([pollSCM(ignorePostCommitHooks: true, scmpoll_spec: '''H/30 8-19 * * 1-5''')]), parameters([ extendedChoice(name: 'CLOUD_REGION', type: 'PT_MULTI_SELECT', value: 'esxifree/dougalab,aws/eu-west-1,gcp/europe-west1,azure/westeurope', description: 'Specify which cloud/region(s) to test', visibleItemCount: 5), - choice(name: 'BUILDENV', choices: ['', 'sandbox'], description: "The environment in which to run the tests"), + choice(name: 'BUILDENV', choices: ['', 'dev'], description: "The environment in which to run the tests"), string(name: 'CLUSTER_ID', defaultValue: 'testsuite', trim: true), [name: 'DNS_FORCE_DISABLE', $class: 'ChoiceParameter', choiceType: 'PT_RADIO', description: '', randomName: 'choice-parameter-31196915540455', script: [$class: 'GroovyScript', fallbackScript: [classpath: [], sandbox: true, script: ''], script: [classpath: [], sandbox: true, script: 'return [\'false:selected\',\'true\',\'true,false\']']]], extendedChoice(name: 'REDEPLOY_SCHEME', type: 'PT_CHECKBOX', value: '_scheme_addallnew_rmdisk_rollback,_scheme_addnewvm_rmdisk_rollback,_scheme_rmvm_rmdisk_only,_scheme_rmvm_keepdisk_rollback', defaultValue: '_scheme_addallnew_rmdisk_rollback,_scheme_addnewvm_rmdisk_rollback,_scheme_rmvm_rmdisk_only,_scheme_rmvm_keepdisk_rollback', description: 'Specify which redeploy scheme(s) to test', visibleItemCount: 5), diff --git a/redeploy/__common/tasks/powerchange_vms_aws.yml b/redeploy/__common/tasks/powerchange_vms_aws.yml new file mode 100644 index 00000000..460bf677 --- /dev/null +++ b/redeploy/__common/tasks/powerchange_vms_aws.yml @@ -0,0 +1,28 @@ +--- + +- name: "powerchange_vms/aws | hosts_to_powerchange (to {{powerchange_new_state}})" + debug: msg="{{hosts_to_powerchange}}" + +- name: "powerchange_vms/aws | {{powerchange_new_state}} VM(s) and set maintenance_mode=true (if stopping)" + block: + - name: powerchange_vms/aws | Set maintenance_mode=true (if stopping) + ec2_tag: + aws_access_key: "{{cluster_vars[buildenv].aws_access_key}}" + aws_secret_key: "{{cluster_vars[buildenv].aws_secret_key}}" + region: "{{cluster_vars.region}}" + resource: "{{ item.instance_id }}" + tags: { maintenance_mode: "true" } + with_items: "{{ hosts_to_powerchange }}" + when: "powerchange_new_state == 'stop'" + + - name: "powerchange_vms/aws | {{powerchange_new_state}} VMs" + ec2: + aws_access_key: "{{cluster_vars[buildenv].aws_access_key}}" + aws_secret_key: "{{cluster_vars[buildenv].aws_secret_key}}" + region: "{{ cluster_vars.region }}" + state: "{% if powerchange_new_state == 'stop' %}stopped{% else %}running{% endif %}" + instance_ids: "{{ hosts_to_powerchange | json_query(\"[].instance_id\") }}" + wait: true + delegate_to: localhost + run_once: true + when: hosts_to_powerchange | length diff --git a/redeploy/__common/tasks/poweron_vms_azure.yml b/redeploy/__common/tasks/powerchange_vms_azure.yml similarity index 51% rename from redeploy/__common/tasks/poweron_vms_azure.yml rename to redeploy/__common/tasks/powerchange_vms_azure.yml index f3b1e7ec..c49996e9 100644 --- a/redeploy/__common/tasks/poweron_vms_azure.yml +++ b/redeploy/__common/tasks/powerchange_vms_azure.yml @@ -1,31 +1,32 @@ ---- - -- name: poweron_vms/azure | hosts_to_start - debug: msg="{{hosts_to_start}}" - -- name: poweron_vms/azure | Power-on VM(s) - block: - - name: poweron_vms/azure | Power-on VM(s) asynchronously - azure.azcollection.azure_rm_virtualmachine: - client_id: "{{cluster_vars[buildenv].azure_client_id}}" - secret: "{{cluster_vars[buildenv].azure_secret}}" - subscription_id: "{{cluster_vars[buildenv].azure_subscription_id}}" - tenant: "{{cluster_vars[buildenv].azure_tenant}}" - resource_group: "{{cluster_vars[buildenv].azure_resource_group}}" - append_tags: yes - name: "{{ item.name }}" - started: yes - zones: ["{{ (item.regionzone.split('-'))[1] }}"] - register: r__azure_rm_virtualmachine - with_items: "{{ hosts_to_start }}" - async: 7200 - poll: 0 - - - name: poweron_vms/azure | Wait for VM(s) to power-on - async_status: - jid: "{{ item.ansible_job_id }}" - register: async_jobs - until: async_jobs.finished - retries: 300 - with_items: "{{r__azure_rm_virtualmachine.results}}" - when: hosts_to_start | length \ No newline at end of file +--- + +- name: "powerchange_vms/azure | hosts_to_powerchange (to {{powerchange_new_state}})" + debug: msg="{{hosts_to_powerchange}}" + +- name: "powerchange_vms/azure | {{powerchange_new_state}} VM(s) and set maintenance_mode=true (if stopping)" + block: + - name: "powerchange_vms/azure | {{powerchange_new_state}} VMs asynchronously and set maintenance_mode=true (if stopping)" + azure.azcollection.azure_rm_virtualmachine: + client_id: "{{cluster_vars[buildenv].azure_client_id}}" + secret: "{{cluster_vars[buildenv].azure_secret}}" + subscription_id: "{{cluster_vars[buildenv].azure_subscription_id}}" + tenant: "{{cluster_vars[buildenv].azure_tenant}}" + resource_group: "{{cluster_vars[buildenv].azure_resource_group}}" + append_tags: yes + name: "{{ item.name }}" + tags: "{% if powerchange_new_state == 'stop' %}{'maintenance_mode': 'true'}{% else %}{{omit}}{% endif %}" + started: "{% if powerchange_new_state == 'stop' %}no{% else %}yes{% endif %}" + zones: ["{{ (item.regionzone.split('-'))[1] }}"] + register: r__azure_rm_virtualmachine + with_items: "{{ hosts_to_powerchange }}" + async: 7200 + poll: 0 + + - name: "powerchange_vms/azure | Wait for VM(s) to {{powerchange_new_state}}" + async_status: + jid: "{{ item.ansible_job_id }}" + register: async_jobs + until: async_jobs.finished + retries: 300 + with_items: "{{r__azure_rm_virtualmachine.results}}" + when: hosts_to_powerchange | length diff --git a/redeploy/__common/tasks/powerchange_vms_esxifree.yml b/redeploy/__common/tasks/powerchange_vms_esxifree.yml new file mode 100644 index 00000000..c62d3593 --- /dev/null +++ b/redeploy/__common/tasks/powerchange_vms_esxifree.yml @@ -0,0 +1,28 @@ +--- + +- name: "powerchange_vms/esxifree | hosts_to_powerchange (to {{powerchange_new_state}})" + debug: msg="{{hosts_to_powerchange}}" + +- name: "powerchange_vms/esxifree | {{powerchange_new_state}} VM(s) and set maintenance_mode=true" + block: + - name: powerchange_vms/esxifree | Set maintenance_mode=true (if stopping) + esxifree_guest: + hostname: "{{ cluster_vars.esxi_ip }}" + username: "{{ cluster_vars.username }}" + password: "{{ cluster_vars.password }}" + name: "{{item.name}}" + state: unchanged + annotation: "{{ item.tagslabels | combine({'maintenance_mode': 'true'}) }}" + with_items: "{{ hosts_to_powerchange }}" + when: "powerchange_new_state == 'stop'" + + - name: "powerchange_vms/esxifree | {{powerchange_new_state}} VMs asynchronously" + esxifree_guest: + hostname: "{{ cluster_vars.esxi_ip }}" + username: "{{ cluster_vars.username }}" + password: "{{ cluster_vars.password }}" + name: "{{item.name}}" + state: "{% if powerchange_new_state == 'stop' %}shutdownguest{% else %}poweredon{% endif %}" + with_items: "{{ hosts_to_powerchange }}" + when: hosts_to_powerchange | length + \ No newline at end of file diff --git a/redeploy/__common/tasks/powerchange_vms_gcp.yml b/redeploy/__common/tasks/powerchange_vms_gcp.yml new file mode 100644 index 00000000..6cff2daa --- /dev/null +++ b/redeploy/__common/tasks/powerchange_vms_gcp.yml @@ -0,0 +1,31 @@ +--- + +- name: "powerchange_vms/gcp | hosts_to_powerchange (to {{powerchange_new_state}})" + debug: msg="{{hosts_to_powerchange}}" + +- name: "powerchange_vms/gcp | {{powerchange_new_state}} VM(s) and set maintenance_mode=true" + block: + - name: "powerchange_vms/gcp | {{powerchange_new_state}} VMs asynchronously and set maintenance_mode=true (if stopping)" + gcp_compute_instance: + name: "{{item.name}}" + project: "{{cluster_vars[buildenv].vpc_project_id}}" + zone: "{{ item.regionzone }}" + auth_kind: "serviceaccount" + service_account_file: "{{gcp_credentials_file}}" + deletion_protection: "{{cluster_vars[buildenv].deletion_protection}}" + status: "{% if powerchange_new_state == 'stop' %}TERMINATED{% else %}RUNNING{% endif %}" + labels: "{% if powerchange_new_state == 'stop' %}{{ item.tagslabels | combine({'maintenance_mode': 'true'}) }}{% else %}{{omit}}{% endif %}" + with_items: "{{ hosts_to_powerchange }}" + register: r__gcp_compute_instance + async: 7200 + poll: 0 + + - name: "powerchange_vms/gcp | Wait for VM(s) to {{powerchange_new_state}}" + async_status: + jid: "{{ item.ansible_job_id }}" + register: async_jobs + until: async_jobs.finished + retries: 300 + with_items: "{{r__gcp_compute_instance.results}}" + when: hosts_to_powerchange | length + \ No newline at end of file diff --git a/redeploy/__common/tasks/poweroff_vms_aws.yml b/redeploy/__common/tasks/poweroff_vms_aws.yml deleted file mode 100644 index b1e6c015..00000000 --- a/redeploy/__common/tasks/poweroff_vms_aws.yml +++ /dev/null @@ -1,28 +0,0 @@ ---- - -- name: poweroff_vms/aws | hosts_to_stop - debug: msg="{{hosts_to_stop}}" - -- name: poweroff_vms/aws | Power-off VM(s) and set maintenance_mode=true - block: - - name: poweroff_vms/aws | Set maintenance_mode=true - ec2_tag: - aws_access_key: "{{cluster_vars[buildenv].aws_access_key}}" - aws_secret_key: "{{cluster_vars[buildenv].aws_secret_key}}" - region: "{{cluster_vars.region}}" - resource: "{{ item.instance_id }}" - tags: - maintenance_mode: "true" - with_items: "{{ hosts_to_stop }}" - - - name: poweroff_vms/aws | Power-off VM(s) - ec2: - aws_access_key: "{{cluster_vars[buildenv].aws_access_key}}" - aws_secret_key: "{{cluster_vars[buildenv].aws_secret_key}}" - region: "{{ cluster_vars.region }}" - state: "stopped" - instance_ids: "{{ hosts_to_stop | json_query(\"[].instance_id\") }}" - wait: true - delegate_to: localhost - run_once: true - when: hosts_to_stop | length diff --git a/redeploy/__common/tasks/poweroff_vms_azure.yml b/redeploy/__common/tasks/poweroff_vms_azure.yml deleted file mode 100644 index 4d0cbeda..00000000 --- a/redeploy/__common/tasks/poweroff_vms_azure.yml +++ /dev/null @@ -1,33 +0,0 @@ ---- - -- name: poweroff_vms/azure | hosts_to_stop - debug: msg="{{hosts_to_stop}}" - -- name: poweroff_vms/azure | Power-off VM(s) and set maintenance_mode=true - block: - - name: poweroff_vms/azure | Power-off VM(s) asynchronously and set maintenance_mode=true - azure.azcollection.azure_rm_virtualmachine: - client_id: "{{cluster_vars[buildenv].azure_client_id}}" - secret: "{{cluster_vars[buildenv].azure_secret}}" - subscription_id: "{{cluster_vars[buildenv].azure_subscription_id}}" - tenant: "{{cluster_vars[buildenv].azure_tenant}}" - resource_group: "{{cluster_vars[buildenv].azure_resource_group}}" - append_tags: yes - tags: - maintenance_mode: "true" - name: "{{ item.name }}" - started: no - zones: ["{{ (item.regionzone.split('-'))[1] }}"] - register: r__azure_rm_virtualmachine - with_items: "{{ hosts_to_stop }}" - async: 7200 - poll: 0 - - - name: poweroff_vms/azure | Wait for VM(s) to power-off - async_status: - jid: "{{ item.ansible_job_id }}" - register: async_jobs - until: async_jobs.finished - retries: 300 - with_items: "{{r__azure_rm_virtualmachine.results}}" - when: hosts_to_stop | length \ No newline at end of file diff --git a/redeploy/__common/tasks/poweroff_vms_esxifree.yml b/redeploy/__common/tasks/poweroff_vms_esxifree.yml deleted file mode 100644 index eecd2e2c..00000000 --- a/redeploy/__common/tasks/poweroff_vms_esxifree.yml +++ /dev/null @@ -1,26 +0,0 @@ ---- - -- name: poweroff_vms/esxifree | hosts_to_stop - debug: msg="{{hosts_to_stop}}" - -- name: poweroff_vms/esxifree | Power-off VM(s) and set maintenance_mode=true - block: - - name: poweroff_vms/esxifree | Set maintenance_mode=true - esxifree_guest: - hostname: "{{ cluster_vars.esxi_ip }}" - username: "{{ cluster_vars.username }}" - password: "{{ cluster_vars.password }}" - name: "{{item.name}}" - state: unchanged - annotation: "{{ item.tagslabels | combine({'maintenance_mode': 'true'}) }}" - with_items: "{{ hosts_to_stop }}" - - - name: poweroff_vms/esxifree | Power-off VM(s) - esxifree_guest: - hostname: "{{ cluster_vars.esxi_ip }}" - username: "{{ cluster_vars.username }}" - password: "{{ cluster_vars.password }}" - name: "{{item.name}}" - state: shutdownguest - with_items: "{{ hosts_to_stop }}" - when: hosts_to_stop | length \ No newline at end of file diff --git a/redeploy/__common/tasks/poweroff_vms_gcp.yml b/redeploy/__common/tasks/poweroff_vms_gcp.yml deleted file mode 100644 index a2cea310..00000000 --- a/redeploy/__common/tasks/poweroff_vms_gcp.yml +++ /dev/null @@ -1,30 +0,0 @@ ---- - -- name: poweroff_vms/gcp | hosts_to_stop - debug: msg="{{hosts_to_stop}}" - -- name: poweroff_vms/gcp | Power-off VM(s) and set maintenance_mode=true - block: - - name: poweroff_vms/gcp | Power-off VMs asynchronously - gcp_compute_instance: - name: "{{item.name}}" - project: "{{cluster_vars[buildenv].vpc_project_id}}" - zone: "{{ item.regionzone }}" - auth_kind: "serviceaccount" - service_account_file: "{{gcp_credentials_file}}" - deletion_protection: "{{cluster_vars[buildenv].deletion_protection}}" - status: "TERMINATED" - labels: "{{ item.tagslabels | combine({'maintenance_mode': 'true'}) }}" - with_items: "{{ hosts_to_stop }}" - register: r__gcp_compute_instance - async: 7200 - poll: 0 - - - name: poweroff_vms/gcp | Wait for VM(s) to power-off - async_status: - jid: "{{ item.ansible_job_id }}" - register: async_jobs - until: async_jobs.finished - retries: 300 - with_items: "{{r__gcp_compute_instance.results}}" - when: hosts_to_stop | length \ No newline at end of file diff --git a/redeploy/__common/tasks/poweron_vms_aws.yml b/redeploy/__common/tasks/poweron_vms_aws.yml deleted file mode 100644 index 51eda15e..00000000 --- a/redeploy/__common/tasks/poweron_vms_aws.yml +++ /dev/null @@ -1,18 +0,0 @@ ---- - -- name: poweron_vms/aws | hosts_to_start - debug: msg="{{hosts_to_start}}" - -- name: poweron_vms/aws | Power-on VM(s) - block: - - name: poweron_vms/aws | Power-on VM(s) - ec2: - aws_access_key: "{{cluster_vars[buildenv].aws_access_key}}" - aws_secret_key: "{{cluster_vars[buildenv].aws_secret_key}}" - region: "{{ cluster_vars.region }}" - state: "running" - instance_ids: "{{ hosts_to_start | json_query(\"[].instance_id\") }}" - wait: true - delegate_to: localhost - run_once: true - when: hosts_to_start | length \ No newline at end of file diff --git a/redeploy/__common/tasks/poweron_vms_esxifree.yml b/redeploy/__common/tasks/poweron_vms_esxifree.yml deleted file mode 100644 index d239aabb..00000000 --- a/redeploy/__common/tasks/poweron_vms_esxifree.yml +++ /dev/null @@ -1,16 +0,0 @@ ---- - -- name: poweron_vms/esxifree | hosts_to_start - debug: msg="{{hosts_to_start}}" - -- name: poweron_vms/esxifree | Power-on VM(s) - block: - - name: poweron_vms/esxifree | Power-on VM(s) - esxifree_guest: - hostname: "{{ cluster_vars.esxi_ip }}" - username: "{{ cluster_vars.username }}" - password: "{{ cluster_vars.password }}" - name: "{{item.name}}" - state: poweredon - with_items: "{{ hosts_to_start }}" - when: hosts_to_start | length diff --git a/redeploy/__common/tasks/poweron_vms_gcp.yml b/redeploy/__common/tasks/poweron_vms_gcp.yml deleted file mode 100644 index e2f9d95e..00000000 --- a/redeploy/__common/tasks/poweron_vms_gcp.yml +++ /dev/null @@ -1,30 +0,0 @@ ---- - -- name: poweron_vms/gcp | hosts_to_start - debug: msg="{{hosts_to_start}}" - -- name: poweron_vms/gcp | Power-on VM(s) - block: - - name: poweron_vms/gcp | Power-on VM(s) asynchronously - gcp_compute_instance: - name: "{{item.name}}" - project: "{{cluster_vars[buildenv].vpc_project_id}}" - zone: "{{ item.regionzone }}" - auth_kind: "serviceaccount" - service_account_file: "{{gcp_credentials_file}}" - deletion_protection: "{{cluster_vars[buildenv].deletion_protection}}" - status: "RUNNING" - labels: "{{ item.tagslabels }}" - with_items: "{{ hosts_to_start }}" - register: r__gcp_compute_instance - async: 7200 - poll: 0 - - - name: poweron_vms/gcp | Wait for VM(s) to power-on - async_status: - jid: "{{ item.ansible_job_id }}" - register: async_jobs - until: async_jobs.finished - retries: 300 - with_items: "{{r__gcp_compute_instance.results}}" - when: hosts_to_start | length diff --git a/redeploy/_scheme_addallnew_rmdisk_rollback/tasks/redeploy.yml b/redeploy/_scheme_addallnew_rmdisk_rollback/tasks/redeploy.yml index 2491c5bb..587e94b9 100644 --- a/redeploy/_scheme_addallnew_rmdisk_rollback/tasks/redeploy.yml +++ b/redeploy/_scheme_addallnew_rmdisk_rollback/tasks/redeploy.yml @@ -78,9 +78,10 @@ - name: Power off old VMs include_role: name: clusterverse/redeploy/__common - tasks_from: "poweroff_vms_{{cluster_vars.type}}.yml" + tasks_from: "powerchange_vms_{{cluster_vars.type}}.yml" vars: - hosts_to_stop: "{{ cluster_hosts_state | json_query(\"[?tagslabels.lifecycle_state=='retiring']\") }}" + hosts_to_powerchange: "{{ cluster_hosts_state | json_query(\"[?tagslabels.lifecycle_state=='retiring']\") }}" + powerchange_new_state: "stop" - name: re-acquire cluster_hosts_target and cluster_hosts_state (for tidy) import_role: diff --git a/redeploy/_scheme_addallnew_rmdisk_rollback/tasks/rescue.yml b/redeploy/_scheme_addallnew_rmdisk_rollback/tasks/rescue.yml index 779421c0..84882e10 100644 --- a/redeploy/_scheme_addallnew_rmdisk_rollback/tasks/rescue.yml +++ b/redeploy/_scheme_addallnew_rmdisk_rollback/tasks/rescue.yml @@ -57,8 +57,8 @@ - name: rescue | poweroff the failed VMs include_role: name: clusterverse/redeploy/__common - tasks_from: "poweroff_vms_{{cluster_vars.type}}.yml" - when: hosts_to_stop | length + tasks_from: "powerchange_vms_{{cluster_vars.type}}.yml" + when: hosts_to_powerchange | length vars: - hosts_to_stop: "{{ cluster_hosts_state | json_query(\"[?tagslabels.lifecycle_state=='redeployfail']\") }}" - + hosts_to_powerchange: "{{ cluster_hosts_state | json_query(\"[?tagslabels.lifecycle_state=='redeployfail']\") }}" + powerchange_new_state: "stop" diff --git a/redeploy/_scheme_addnewvm_rmdisk_rollback/tasks/main.yml b/redeploy/_scheme_addnewvm_rmdisk_rollback/tasks/main.yml index 7602e3eb..600cdee0 100644 --- a/redeploy/_scheme_addnewvm_rmdisk_rollback/tasks/main.yml +++ b/redeploy/_scheme_addnewvm_rmdisk_rollback/tasks/main.yml @@ -41,14 +41,17 @@ name: "{{predeleterole}}" when: predeleterole is defined and predeleterole != "" vars: - hosts_to_remove: "{{ hosts_to_stop | json_query(\"[?contains('RUNNING,running,poweredOn', instance_state)]\") }}" + hosts_to_remove: "{{ hosts_to_change | json_query(\"[?contains('RUNNING,running,poweredOn', instance_state)]\") }}" - name: Power off any other retiring VM(s) that might exist if we're redeploying to a smaller topology. include_role: name: clusterverse/redeploy/__common - tasks_from: "poweroff_vms_{{cluster_vars.type}}.yml" + tasks_from: "powerchange_vms_{{cluster_vars.type}}.yml" + vars: + hosts_to_powerchange: "{{ hosts_to_change }}" + powerchange_new_state: "stop" vars: - hosts_to_stop: "{{ cluster_hosts_state | json_query(\"[?tagslabels.lifecycle_state=='retiring' && ('\"+ myhosttypes|default('') + \"' == '' || contains(['\"+ myhosttypes|default('') + \"'], tagslabels.hosttype))]\") }}" + hosts_to_change: "{{ cluster_hosts_state | json_query(\"[?tagslabels.lifecycle_state=='retiring' && ('\"+ myhosttypes|default('') + \"' == '' || contains(['\"+ myhosttypes|default('') + \"'], tagslabels.hosttype))]\") }}" when: (canary=="finish" or canary=="none") - name: re-acquire cluster_hosts_target and cluster_hosts_state (for tidy - can't be in the tidy block because the block depends on this info being correct) diff --git a/redeploy/_scheme_addnewvm_rmdisk_rollback/tasks/redeploy_by_hosttype_by_host.yml b/redeploy/_scheme_addnewvm_rmdisk_rollback/tasks/redeploy_by_hosttype_by_host.yml index 2faad177..cd185693 100644 --- a/redeploy/_scheme_addnewvm_rmdisk_rollback/tasks/redeploy_by_hosttype_by_host.yml +++ b/redeploy/_scheme_addnewvm_rmdisk_rollback/tasks/redeploy_by_hosttype_by_host.yml @@ -30,9 +30,10 @@ - name: Power off old VMs include_role: name: clusterverse/redeploy/__common - tasks_from: "poweroff_vms_{{cluster_vars.type}}.yml" + tasks_from: "powerchange_vms_{{cluster_vars.type}}.yml" vars: - hosts_to_stop: "{{ hosts_to_remove }}" + hosts_to_powerchange: "{{ hosts_to_remove }}" + powerchange_new_state: "stop" - name: re-acquire the dynamic inventory include_role: diff --git a/redeploy/_scheme_addnewvm_rmdisk_rollback/tasks/rescue.yml b/redeploy/_scheme_addnewvm_rmdisk_rollback/tasks/rescue.yml index 48648609..f7e4f235 100644 --- a/redeploy/_scheme_addnewvm_rmdisk_rollback/tasks/rescue.yml +++ b/redeploy/_scheme_addnewvm_rmdisk_rollback/tasks/rescue.yml @@ -7,9 +7,10 @@ - name: rescue | Power-on the 'retiring' VMs include_role: name: clusterverse/redeploy/__common - tasks_from: "poweron_vms_{{cluster_vars.type}}.yml" + tasks_from: "powerchange_vms_{{cluster_vars.type}}.yml" vars: - hosts_to_start: "{{ cluster_hosts_state | json_query(\"[?tagslabels.lifecycle_state=='retiring']\") }}" + hosts_to_powerchange: "{{ cluster_hosts_state | json_query(\"[?tagslabels.lifecycle_state=='retiring']\") }}" + powerchange_new_state: "start" - name: rescue | re-acquire cluster_hosts_target and cluster_hosts_state import_role: @@ -61,6 +62,7 @@ - name: rescue | Power-off the VMs include_role: name: clusterverse/redeploy/__common - tasks_from: "poweroff_vms_{{cluster_vars.type}}.yml" + tasks_from: "powerchange_vms_{{cluster_vars.type}}.yml" vars: - hosts_to_stop: "{{ cluster_hosts_state | json_query(\"[?tagslabels.lifecycle_state=='redeployfail']\") }}" + hosts_to_powerchange: "{{ cluster_hosts_state | json_query(\"[?tagslabels.lifecycle_state=='redeployfail']\") }}" + powerchange_new_state: "stop" diff --git a/redeploy/_scheme_rmvm_keepdisk_rollback/tasks/by_hosttype_by_host.yml b/redeploy/_scheme_rmvm_keepdisk_rollback/tasks/by_hosttype_by_host.yml index 59ed73ab..cf93d87b 100644 --- a/redeploy/_scheme_rmvm_keepdisk_rollback/tasks/by_hosttype_by_host.yml +++ b/redeploy/_scheme_rmvm_keepdisk_rollback/tasks/by_hosttype_by_host.yml @@ -8,16 +8,20 @@ include_role: name: "{{predeleterole}}" vars: - hosts_to_remove: "{{ hosts_to_stop }}" + hosts_to_remove: "{{ hosts_to_change }}" when: predeleterole is defined and predeleterole != "" - name: by_hosttype_by_host | Power off old VM include_role: name: clusterverse/redeploy/__common - tasks_from: "poweroff_vms_{{cluster_vars.type}}.yml" + tasks_from: "powerchange_vms_{{cluster_vars.type}}.yml" + vars: + hosts_to_powerchange: "{{ hosts_to_change }}" + powerchange_new_state: "stop" + vars: _host_to_redeploy_nosuffix: "{{host_to_redeploy.hostname | regex_replace('-(?!.*-).*')}}" #Remove the cluster_suffix from the hostname - hosts_to_stop: "{{ cluster_hosts_state | to_json | from_json | json_query(\"[?tagslabels.lifecycle_state!='current' && starts_with(name, '\" + _host_to_redeploy_nosuffix + \"')]\") }}" + hosts_to_change: "{{ cluster_hosts_state | to_json | from_json | json_query(\"[?tagslabels.lifecycle_state!='current' && starts_with(name, '\" + _host_to_redeploy_nosuffix + \"')]\") }}" - name: "by_hosttype_by_host | Run {{mainclusteryml}} to add {{host_to_redeploy.hostname}} to cluster" shell: "{{ (argv | join(' ')) | regex_replace('redeploy.yml', mainclusteryml) }} -e cluster_suffix={{cluster_suffix}} -e '{'cluster_hosts_target': [{{host_to_redeploy | to_json}}]}'" @@ -36,9 +40,10 @@ - name: by_hosttype_by_host | Power on new VM (not needed for normal redeploy, but for rescue case) include_role: name: clusterverse/redeploy/__common - tasks_from: "poweron_vms_{{cluster_vars.type}}.yml" + tasks_from: "powerchange_vms_{{cluster_vars.type}}.yml" vars: - hosts_to_start: "{{ cluster_hosts_state | selectattr('name', '==', host_to_redeploy.hostname) | list }}" + hosts_to_powerchange: "{{ cluster_hosts_state | selectattr('name', '==', host_to_redeploy.hostname) | list }}" + powerchange_new_state: "start" - name: by_hosttype_by_host | re-acquire the dynamic inventory include_role: From ac8b31e4dd3ff6dc357c47018cc01a0fb22b15e4 Mon Sep 17 00:00:00 2001 From: Dougal Seeley Date: Sun, 21 Mar 2021 09:52:15 +0000 Subject: [PATCH 51/58] Fix check for 'clean' variable being defined. --- clean/tasks/aws.yml | 2 +- clean/tasks/azure.yml | 2 +- clean/tasks/gcp.yml | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/clean/tasks/aws.yml b/clean/tasks/aws.yml index aad3fee1..5078a43f 100644 --- a/clean/tasks/aws.yml +++ b/clean/tasks/aws.yml @@ -33,4 +33,4 @@ name: "{{ cluster_name }}-sg" vpc_id: "{{vpc_id}}" state: absent - when: clean == '_all_' + when: clean is defined and clean == '_all_' diff --git a/clean/tasks/azure.yml b/clean/tasks/azure.yml index 7d049767..a8480f7c 100644 --- a/clean/tasks/azure.yml +++ b/clean/tasks/azure.yml @@ -143,4 +143,4 @@ # delay: 3 # retries: 300 # with_items: "{{r__azure_rm_networkinterface.results}}" -# when: clean == '_all_' +# when: clean is defined and clean == '_all_' diff --git a/clean/tasks/gcp.yml b/clean/tasks/gcp.yml index a37910a8..af82e1c0 100644 --- a/clean/tasks/gcp.yml +++ b/clean/tasks/gcp.yml @@ -59,4 +59,4 @@ project: "{{cluster_vars[buildenv].vpc_host_project_id}}" state: absent when: create_gcp_network is defined and create_gcp_network|bool - when: clean == '_all_' + when: clean is defined and clean == '_all_' \ No newline at end of file From b7a98a5f28024ad2850345c722039f18a5aed1c8 Mon Sep 17 00:00:00 2001 From: Dougal Seeley Date: Sat, 27 Mar 2021 08:16:45 +0000 Subject: [PATCH 52/58] Update blockdevmap. Simplify dynamic_inventory. + Update blockdevmap.py to latest to fix problem with non-nitro AWS ephemeral disks. + Reuse get_cluster_hosts_state_{{buildenv}}.yml code to build dynamic inventory. --- .../sandbox/cluster_vars__buildenv.yml | 7 + EXAMPLE/cluster_defs/cluster_vars.yml | 4 +- EXAMPLE/clusterverse_label_upgrade_v1-v2.yml | 2 +- _dependencies/library/blockdevmap.py | 2 +- clean/tasks/gcp.yml | 2 +- .../tasks/get_cluster_hosts_state_aws.yml | 2 +- .../tasks/get_cluster_hosts_state_azure.yml | 80 +++++++++++- .../get_cluster_hosts_state_esxifree.yml | 2 +- .../tasks/get_cluster_hosts_state_gcp.yml | 2 +- dynamic_inventory/tasks/aws.yml | 23 ---- dynamic_inventory/tasks/azure.yml | 123 ------------------ dynamic_inventory/tasks/esxifree.yml | 45 ------- dynamic_inventory/tasks/gcp.yml | 33 ----- dynamic_inventory/tasks/main.yml | 33 ++--- .../remove_maintenance_mode_esxifree.yml | 2 +- 15 files changed, 110 insertions(+), 252 deletions(-) delete mode 100644 dynamic_inventory/tasks/aws.yml delete mode 100644 dynamic_inventory/tasks/azure.yml delete mode 100644 dynamic_inventory/tasks/esxifree.yml delete mode 100644 dynamic_inventory/tasks/gcp.yml diff --git a/EXAMPLE/cluster_defs/aws/testid/eu-west-1/sandbox/cluster_vars__buildenv.yml b/EXAMPLE/cluster_defs/aws/testid/eu-west-1/sandbox/cluster_vars__buildenv.yml index af0cd5d3..b0b529e6 100644 --- a/EXAMPLE/cluster_defs/aws/testid/eu-west-1/sandbox/cluster_vars__buildenv.yml +++ b/EXAMPLE/cluster_defs/aws/testid/eu-west-1/sandbox/cluster_vars__buildenv.yml @@ -50,6 +50,13 @@ cluster_vars: version: "{{sysdisks_version | default('')}}" vms_by_az: { a: 1, b: 1, c: 0 } + hostnvme-notnitro: + auto_volumes: + - { device_name: "/dev/sdb", mountpoint: "/media/mysvc", fstype: "ext4", volume_type: "ephemeral", ephemeral: ephemeral0 } + flavor: i3.large + version: "{{sys_version | default('')}}" + vms_by_az: { a: 1, b: 1, c: 0 } + hostnvme-multi: auto_volumes: - { device_name: "/dev/sdb", mountpoint: "/media/mysvc", fstype: "ext4", volume_type: "ephemeral", ephemeral: ephemeral0 } diff --git a/EXAMPLE/cluster_defs/cluster_vars.yml b/EXAMPLE/cluster_defs/cluster_vars.yml index 76a9eacc..99ed20ec 100644 --- a/EXAMPLE/cluster_defs/cluster_vars.yml +++ b/EXAMPLE/cluster_defs/cluster_vars.yml @@ -10,8 +10,8 @@ redeploy_schemes_supported: ['_scheme_addallnew_rmdisk_rollback', '_scheme_addne skip_dynamic_inventory_sshwait: true test_touch_disks: true -app_name: "{{lookup('pipe', 'whoami')}}-test" # The name of the application cluster (e.g. 'couchbase', 'nginx'); becomes part of cluster_name. Provided is a default to ensure no accidental overwriting. -app_class: "test" # The class of application (e.g. 'database', 'webserver'); becomes part of the fqdn +app_name: "{{lookup('pipe', 'whoami') | lower}}-test" # The name of the application cluster (e.g. 'couchbase', 'nginx'); becomes part of cluster_name. Provided is a default to ensure no accidental overwriting. +app_class: "test" # The class of application (e.g. 'database', 'webserver'); becomes part of the fqdn beats_config: filebeat: diff --git a/EXAMPLE/clusterverse_label_upgrade_v1-v2.yml b/EXAMPLE/clusterverse_label_upgrade_v1-v2.yml index 65dd512c..4c0860b9 100644 --- a/EXAMPLE/clusterverse_label_upgrade_v1-v2.yml +++ b/EXAMPLE/clusterverse_label_upgrade_v1-v2.yml @@ -5,7 +5,7 @@ connection: local gather_facts: true tasks: - - include: + - include_role: name: 'clusterverse/_dependencies' - include_role: diff --git a/_dependencies/library/blockdevmap.py b/_dependencies/library/blockdevmap.py index b6a8eeab..3c10351e 100644 --- a/_dependencies/library/blockdevmap.py +++ b/_dependencies/library/blockdevmap.py @@ -427,7 +427,7 @@ def __init__(self, **kwds): self.module.fail_json(msg=os_device_path + ": FileNotFoundError" + str(e)) except TypeError as e: if instance_store_count < len(instance_store_map): - os_device.update({"device_name_os": os_device_path, "device_name_cloud": '/dev/' + instance_store_map[instance_store_count]['ephemeral_map'], "volume_id": dev.get_volume_id()}) + os_device.update({"device_name_os": os_device_path, "device_name_cloud": '/dev/' + instance_store_map[instance_store_count]['ephemeral_map'], "volume_id": instance_store_map[instance_store_count]['ephemeral_id']}) instance_store_count += 1 else: self.module.warn(u"%s is not an EBS device and there is no instance store mapping." % os_device_path) diff --git a/clean/tasks/gcp.yml b/clean/tasks/gcp.yml index af82e1c0..d20ea3a7 100644 --- a/clean/tasks/gcp.yml +++ b/clean/tasks/gcp.yml @@ -59,4 +59,4 @@ project: "{{cluster_vars[buildenv].vpc_host_project_id}}" state: absent when: create_gcp_network is defined and create_gcp_network|bool - when: clean is defined and clean == '_all_' \ No newline at end of file + when: clean is defined and clean == '_all_' diff --git a/cluster_hosts/tasks/get_cluster_hosts_state_aws.yml b/cluster_hosts/tasks/get_cluster_hosts_state_aws.yml index 63d8bc25..f3546ebf 100644 --- a/cluster_hosts/tasks/get_cluster_hosts_state_aws.yml +++ b/cluster_hosts/tasks/get_cluster_hosts_state_aws.yml @@ -14,4 +14,4 @@ - name: get_cluster_hosts_state/aws | Set cluster_hosts_state set_fact: - cluster_hosts_state: "{{r__ec2_instance_info.instances | json_query(\"[].{name: tags.Name, regionzone: placement.availability_zone, tagslabels: tags, instance_id: instance_id, instance_state: state.name}\") }}" + cluster_hosts_state: "{{r__ec2_instance_info.instances | json_query(\"[].{name: tags.Name, regionzone: placement.availability_zone, tagslabels: tags, instance_id: instance_id, instance_state: state.name, ipv4: {private: private_ip_address, public: public_ip_address} }\") }}" diff --git a/cluster_hosts/tasks/get_cluster_hosts_state_azure.yml b/cluster_hosts/tasks/get_cluster_hosts_state_azure.yml index 70a29d08..4a344c91 100644 --- a/cluster_hosts/tasks/get_cluster_hosts_state_azure.yml +++ b/cluster_hosts/tasks/get_cluster_hosts_state_azure.yml @@ -1,5 +1,11 @@ --- +# Note: Azure, irritatingly, doesn't provide all the info we need for cluster_hosts_state/dynamic_inventory in one place. We have to run each of these, passing the results of the previous into the next. +# + VM info: azure_rm_virtualmachine_info +# + VM AZ info: azure_rm_resource_info +# + Private IP info: azure_rm_networkinterface_info +# + Public IP info: azure_rm_publicipaddress_info + - name: get_cluster_hosts_state/azure | Get existing instance info azure.azcollection.azure_rm_virtualmachine_info: client_id: "{{cluster_vars[buildenv].azure_client_id}}" @@ -13,7 +19,12 @@ delegate_to: localhost run_once: true -- name: get_cluster_hosts_state/azure | Get zone instance info +#- name: get_cluster_hosts_state/azure | r__azure_rm_virtualmachine_info +# debug: msg="{{r__azure_rm_virtualmachine_info}}" +# delegate_to: localhost +# run_once: true + +- name: get_cluster_hosts_state/azure | Get instance resource info (for VM AZ info) azure.azcollection.azure_rm_resource_info: client_id: "{{cluster_vars[buildenv].azure_client_id}}" secret: "{{cluster_vars[buildenv].azure_secret}}" @@ -31,11 +42,72 @@ delegate_to: localhost run_once: true -- name: get_cluster_hosts_state/azure | r__azure_rm_resource_info - debug: msg="{{r__azure_rm_resource_info}}" +#- name: get_cluster_hosts_state/azure | r__azure_rm_resource_info +# debug: msg="{{r__azure_rm_resource_info}}" +# delegate_to: localhost +# run_once: true + +- name: get_cluster_hosts_state/azure | Get network interface info (per instance) + azure.azcollection.azure_rm_networkinterface_info: + client_id: "{{cluster_vars[buildenv].azure_client_id}}" + secret: "{{cluster_vars[buildenv].azure_secret}}" + subscription_id: "{{cluster_vars[buildenv].azure_subscription_id}}" + tenant: "{{cluster_vars[buildenv].azure_tenant}}" + resource_group: "{{cluster_vars[buildenv].azure_resource_group}}" + name: "{{ item.networkInterface | basename }}" + with_items: "{{ r__azure_rm_resource_info.results | json_query(\"[].{name: item.name, regionzone: join('-',[item.location,response[0].zones[0]]), tagslabels: item.tags, instance_id: item.id, instance_state: item.power_state, networkInterface: response[0].properties.networkProfile.networkInterfaces[0].id }\") }}" + register: r__azure_rm_networkinterface_info + delegate_to: localhost + run_once: true + async: 7200 + poll: 0 + +- name: get_cluster_hosts_state/azure | Wait for network interface info + async_status: { jid: "{{ item.ansible_job_id }}" } + register: r__async_status__azure_rm_networkinterface_info + until: r__async_status__azure_rm_networkinterface_info.finished + delay: 3 + retries: 300 + with_items: "{{r__azure_rm_networkinterface_info.results}}" delegate_to: localhost run_once: true +#- name: get_cluster_hosts_state/azure | r__async_status__azure_rm_networkinterface_info +# debug: msg="{{r__async_status__azure_rm_networkinterface_info}}" +# delegate_to: localhost +# run_once: true + + +- name: get_cluster_hosts_state/azure | Get publicipaddress info (per instance) + azure.azcollection.azure_rm_publicipaddress_info: + client_id: "{{cluster_vars[buildenv].azure_client_id}}" + secret: "{{cluster_vars[buildenv].azure_secret}}" + subscription_id: "{{cluster_vars[buildenv].azure_subscription_id}}" + tenant: "{{cluster_vars[buildenv].azure_tenant}}" + resource_group: "{{cluster_vars[buildenv].azure_resource_group}}" + name: "{{ item.networkinterfaces[0].ip_configurations[0].public_ip_address | basename }}" + with_items: "{{ r__async_status__azure_rm_networkinterface_info.results }}" + register: r__azure_rm_publicipaddress_info + delegate_to: localhost + run_once: true + async: 7200 + poll: 0 + +- name: get_cluster_hosts_state/azure | Wait for publicipaddress info + async_status: { jid: "{{ item.ansible_job_id }}" } + register: r__async_status__azure_rm_publicipaddress_info + until: r__async_status__azure_rm_publicipaddress_info.finished + delay: 3 + retries: 300 + with_items: "{{r__azure_rm_publicipaddress_info.results}}" + delegate_to: localhost + run_once: true + +#- name: get_cluster_hosts_state/azure | r__async_status__azure_rm_publicipaddress_info +# debug: msg="{{r__async_status__azure_rm_publicipaddress_info}}" +# delegate_to: localhost +# run_once: true + - name: get_cluster_hosts_state/azure | Set cluster_hosts_state set_fact: - cluster_hosts_state: "{{r__azure_rm_resource_info.results | json_query(\"[].{name: response[0].name, regionzone: join('-',[response[0].location,response[0].zones[0]]), tagslabels: response[0].tags, instance_id: response[0].id, instance_state: item.power_state}\") }}" + cluster_hosts_state: "{{r__async_status__azure_rm_publicipaddress_info.results | json_query(\"[].{name: item.item.item.item.name, regionzone: item.item.item.item.regionzone, tagslabels: item.item.item.item.tagslabels, instance_id: item.item.item.item.instance_id, instance_state: item.item.item.item.instance_state, ipv4: {private: item.item.networkinterfaces[0].ip_configurations[0].private_ip_address, public: publicipaddresses[0].ip_address} }\") }}" diff --git a/cluster_hosts/tasks/get_cluster_hosts_state_esxifree.yml b/cluster_hosts/tasks/get_cluster_hosts_state_esxifree.yml index e39b2ae8..dfe18025 100644 --- a/cluster_hosts/tasks/get_cluster_hosts_state_esxifree.yml +++ b/cluster_hosts/tasks/get_cluster_hosts_state_esxifree.yml @@ -40,4 +40,4 @@ - name: get_cluster_hosts_state/esxifree | Set cluster_hosts_state set_fact: - cluster_hosts_state: "{{ r__vmware_guest_info.results | json_query(\"[].{name: instance.hw_name, regionzone: None, tagslabels: instance.annotation, instance_id: instance.moid, instance_state: instance.hw_power_status}\") }}" + cluster_hosts_state: "{{ r__vmware_guest_info.results | json_query(\"[].{name: instance.hw_name, regionzone: None, tagslabels: instance.annotation, instance_id: instance.moid, instance_state: instance.hw_power_status, ipv4: {private: item.ip_address, public: null} }\") }}" diff --git a/cluster_hosts/tasks/get_cluster_hosts_state_gcp.yml b/cluster_hosts/tasks/get_cluster_hosts_state_gcp.yml index 0af1caea..fc6a1083 100644 --- a/cluster_hosts/tasks/get_cluster_hosts_state_gcp.yml +++ b/cluster_hosts/tasks/get_cluster_hosts_state_gcp.yml @@ -23,4 +23,4 @@ {%- endfor -%} {{ res }} vars: - _cluster_hosts_state__urlregion: "{{r__gcp_compute_instance_info.results | json_query(\"[?resources[?labels]].resources[].{name: name, regionzone: zone, tagslabels: labels, instance_id: id, instance_state: status}\") }}" + _cluster_hosts_state__urlregion: "{{r__gcp_compute_instance_info.results | json_query(\"[?resources[?labels]].resources[].{name: name, regionzone: zone, tagslabels: labels, instance_id: id, instance_state: status, ipv4: {private: networkInterfaces[0].networkIP, public: networkInterfaces[0].accessConfigs[0].natIP} }\") }}" diff --git a/dynamic_inventory/tasks/aws.yml b/dynamic_inventory/tasks/aws.yml deleted file mode 100644 index 38713bdd..00000000 --- a/dynamic_inventory/tasks/aws.yml +++ /dev/null @@ -1,23 +0,0 @@ ---- - -- name: dynamic_inventory/aws | Get AWS instance facts - ec2_instance_info: - filters: - "tag:cluster_name": "{{cluster_name}}" - "instance-state-name": ["running"] - aws_access_key: "{{cluster_vars[buildenv].aws_access_key}}" - aws_secret_key: "{{cluster_vars[buildenv].aws_secret_key}}" - region: "{{cluster_vars.region}}" - register: r__ec2_instance_info - delegate_to: localhost - -#- debug: msg={{r__ec2_instance_info}} - -- name: dynamic_inventory/aws | Set dynamic_inventory_flat - set_fact: - dynamic_inventory_flat: | - {%- if cluster_vars.inventory_ip == 'private' -%} - {{ r__ec2_instance_info.instances | json_query('[*].{hosttype: tags.hosttype, hostname: tags.Name, private_ip: private_ip_address, public_ip: public_ip_address, inventory_ip: private_ip_address, regionzone: placement.availability_zone}') }} - {%- else -%} - {{ r__ec2_instance_info.instances | json_query('[*].{hosttype: tags.hosttype, hostname: tags.Name, private_ip: private_ip_address, public_ip: public_ip_address, inventory_ip: public_ip_address regionzone: placement.availability_zone}') }} - {%- endif -%} \ No newline at end of file diff --git a/dynamic_inventory/tasks/azure.yml b/dynamic_inventory/tasks/azure.yml deleted file mode 100644 index 53051335..00000000 --- a/dynamic_inventory/tasks/azure.yml +++ /dev/null @@ -1,123 +0,0 @@ ---- - -# Note: Azure, irritatingly, doesn't provide all the info we need for cluster_hosts_target in one place. We have to run each of these, passing the results of the previous into the next. -# + VM info: azure_rm_virtualmachine_info -# + VM AZ info: azure_rm_resource_info -# + Private IP info: azure_rm_networkinterface_info -# + Public IP info: azure_rm_publicipaddress_info - -- name: dynamic_inventory/azure | Get instance info - azure.azcollection.azure_rm_virtualmachine_info: - client_id: "{{cluster_vars[buildenv].azure_client_id}}" - secret: "{{cluster_vars[buildenv].azure_secret}}" - subscription_id: "{{cluster_vars[buildenv].azure_subscription_id}}" - tenant: "{{cluster_vars[buildenv].azure_tenant}}" - resource_group: "{{cluster_vars[buildenv].azure_resource_group}}" - tags: - - "cluster_name:{{cluster_name}}" - register: r__azure_rm_virtualmachine_info - delegate_to: localhost - run_once: true - -#- name: dynamic_inventory/azure | r__azure_rm_virtualmachine_info -# debug: msg="{{r__azure_rm_virtualmachine_info}}" -# delegate_to: localhost -# run_once: true - -- name: dynamic_inventory/azure | Get instance resource info (for VM AZ info) - azure.azcollection.azure_rm_resource_info: - client_id: "{{cluster_vars[buildenv].azure_client_id}}" - secret: "{{cluster_vars[buildenv].azure_secret}}" - subscription_id: "{{cluster_vars[buildenv].azure_subscription_id}}" - tenant: "{{cluster_vars[buildenv].azure_tenant}}" - resource_group: "{{cluster_vars[buildenv].azure_resource_group}}" - resource_name: "{{ item.name }}" - resource_type: VirtualMachines - provider: Compute - with_items: "{{ r__azure_rm_virtualmachine_info.vms | json_query(\"[?power_state=='running']\") }}" - register: r__azure_rm_resource_info - delegate_to: localhost - run_once: true - async: 7200 - poll: 0 - -- name: dynamic_inventory/azure | Wait for instance resource info (to get Zone info) - async_status: { jid: "{{ item.ansible_job_id }}" } - register: r__async_status__azure_rm_resource_info - until: r__async_status__azure_rm_resource_info.finished - delay: 3 - retries: 300 - with_items: "{{r__azure_rm_resource_info.results}}" - -#- name: dynamic_inventory/azure | r__async_status__azure_rm_resource_info -# debug: msg="{{r__async_status__azure_rm_resource_info}}" -# delegate_to: localhost -# run_once: true - - -- name: dynamic_inventory/azure | Get network interface info (per instance) - azure.azcollection.azure_rm_networkinterface_info: - client_id: "{{cluster_vars[buildenv].azure_client_id}}" - secret: "{{cluster_vars[buildenv].azure_secret}}" - subscription_id: "{{cluster_vars[buildenv].azure_subscription_id}}" - tenant: "{{cluster_vars[buildenv].azure_tenant}}" - resource_group: "{{cluster_vars[buildenv].azure_resource_group}}" - name: "{{ item.networkInterface | basename }}" - with_items: "{{ r__async_status__azure_rm_resource_info.results | json_query(\"[].response[0].{hosttype: tags.hosttype, hostname: name, networkInterface: properties.networkProfile.networkInterfaces[0].id, regionzone: join('-',[location,zones[0]])}\") }}" - register: r__azure_rm_networkinterface_info - delegate_to: localhost - run_once: true - async: 7200 - poll: 0 - -- name: dynamic_inventory/azure | Wait for network interface info - async_status: { jid: "{{ item.ansible_job_id }}" } - register: r__async_status__azure_rm_networkinterface_info - until: r__async_status__azure_rm_networkinterface_info.finished - delay: 3 - retries: 300 - with_items: "{{r__azure_rm_networkinterface_info.results}}" - -#- name: dynamic_inventory/azure | r__async_status__azure_rm_networkinterface_info -# debug: msg="{{r__async_status__azure_rm_networkinterface_info}}" -# delegate_to: localhost -# run_once: true - - -- name: dynamic_inventory/azure | Get publicipaddress info (per instance) - azure.azcollection.azure_rm_publicipaddress_info: - client_id: "{{cluster_vars[buildenv].azure_client_id}}" - secret: "{{cluster_vars[buildenv].azure_secret}}" - subscription_id: "{{cluster_vars[buildenv].azure_subscription_id}}" - tenant: "{{cluster_vars[buildenv].azure_tenant}}" - resource_group: "{{cluster_vars[buildenv].azure_resource_group}}" - name: "{{ item.public_ip_id | basename }}" - with_items: "{{ r__async_status__azure_rm_networkinterface_info.results | json_query(\"[].{hosttype: item.item.hosttype, hostname: item.item.hostname, regionzone: item.item.regionzone, private_ip: networkinterfaces[].ip_configurations[0].private_ip_address|[0], public_ip_id: networkinterfaces[].ip_configurations[0].public_ip_address|[0]}\") }}" - register: r__azure_rm_networkinterface_info - delegate_to: localhost - run_once: true - async: 7200 - poll: 0 - -- name: dynamic_inventory/azure | Wait for publicipaddress info - async_status: { jid: "{{ item.ansible_job_id }}" } - register: r__async_status__azure_rm_publicipaddress_info - until: r__async_status__azure_rm_publicipaddress_info.finished - delay: 3 - retries: 300 - with_items: "{{r__azure_rm_networkinterface_info.results}}" - -#- name: dynamic_inventory/azure | r__async_status__azure_rm_publicipaddress_info -# debug: msg="{{r__async_status__azure_rm_publicipaddress_info}}" -# delegate_to: localhost -# run_once: true - -- name: dynamic_inventory/aws | Set dynamic_inventory_flat - set_fact: - dynamic_inventory_flat: | - {%- if cluster_vars.inventory_ip == 'private' -%} - {{ r__async_status__azure_rm_publicipaddress_info.results | json_query('[].{hosttype: item.item.hosttype, hostname: item.item.hostname, regionzone: item.item.regionzone, private_ip: item.item.private_ip, public_ip: publicipaddresses[0].ip_address, inventory_ip: item.item.private_ip}') | default([]) }} - {%- else -%} - {{ r__async_status__azure_rm_publicipaddress_info.results | json_query('[].{hosttype: item.item.hosttype, hostname: item.item.hostname, regionzone: item.item.regionzone, private_ip: item.item.private_ip, public_ip: publicipaddresses[0].ip_address, inventory_ip: publicipaddresses[0].ip_address}') | default([]) }} - {%- endif -%} - diff --git a/dynamic_inventory/tasks/esxifree.yml b/dynamic_inventory/tasks/esxifree.yml deleted file mode 100644 index a7e0d4fb..00000000 --- a/dynamic_inventory/tasks/esxifree.yml +++ /dev/null @@ -1,45 +0,0 @@ ---- - -- name: dynamic_inventory/esxifree | Get existing VMware instance info - vmware_vm_info: - username: "{{ cluster_vars.username }}" - password: "{{ cluster_vars.password }}" - hostname: "{{ cluster_vars.esxi_ip }}" - validate_certs: no - register: r__vmware_vm_info - delegate_to: localhost - run_once: true - -- name: dynamic_inventory/esxifree | Get existing VMware instance facts - vmware_guest_info: - username: "{{ cluster_vars.username }}" - password: "{{ cluster_vars.password }}" - hostname: "{{ cluster_vars.esxi_ip }}" - validate_certs: no - datacenter: None - uuid: "{{item.uuid}}" - with_items: "{{ r__vmware_vm_info.virtual_machines | to_json | from_json | json_query(\"[?starts_with(guest_name, '\"+cluster_name+\"') && power_state=='poweredOn']\") }}" - register: r__vmware_guest_info - delegate_to: localhost - run_once: true - -## esxifree hosts must use the esxi 'annotations' field as json. They are stored as unconventional text in the vmx file, so must -## be converted into inline-json within the facts. If the annotation field is not convertible to json, then we don't consider this VM part of the cluster. -- name: dynamic_inventory/esxifree | Update r__vmware_guest_info result with json-parsed annotations - set_fact: - r__vmware_guest_info: | - {% set res = {'results': []} -%} - {%- for result in r__vmware_guest_info.results -%} - {%- set loadloose_res = result.instance.annotation | json_loads_loose -%} - {%- if loadloose_res | type_debug == 'dict' or loadloose_res | type_debug == 'list' -%} - {%- set _ = result.instance.update({'annotation': loadloose_res}) -%} - {%- set _ = res.results.append(result) -%} - {%- endif -%} - {%- endfor -%} - {{ res }} - -#- debug: msg={{r__vmware_guest_info}} - -- name: dynamic_inventory/esxifree | Set dynamic_inventory_flat - set_fact: - dynamic_inventory_flat: "{{ r__vmware_guest_info.results | json_query(\"[].{hosttype: instance.annotation.hosttype, hostname: item.guest_name, private_ip: item.ip_address, inventory_ip: item.ip_address}\") | default([]) }}" diff --git a/dynamic_inventory/tasks/gcp.yml b/dynamic_inventory/tasks/gcp.yml deleted file mode 100644 index 4b5c46f0..00000000 --- a/dynamic_inventory/tasks/gcp.yml +++ /dev/null @@ -1,33 +0,0 @@ ---- - -# Note: 'scopes' comes from here (https://developers.google.com/identity/protocols/googlescopes#computev1) -- name: dynamic_inventory/gcp | Get GCP instance facts - gcp_compute_instance_info: - zone: "{{cluster_vars.region}}-{{item}}" - filters: - - "name = {{cluster_name}}*" - - "status = RUNNING" # gcloud compute instances list --filter="status=RUNNING" - project: "{{cluster_vars[buildenv].vpc_project_id}}" - auth_kind: "serviceaccount" - service_account_file: "{{gcp_credentials_file}}" - scopes: ["https://www.googleapis.com/auth/compute.readonly"] - with_items: "{{ cluster_vars[buildenv].hosttype_vars | json_query(\"*[vms_by_az][][keys(@)][][]\") | unique }}" - register: r__gcp_compute_instance_info - delegate_to: localhost - run_once: true - -#- debug: msg={{r__gcp_compute_instance_info}} - -- name: dynamic_inventory/gcp | Set dynamic_inventory_flat - set_fact: - dynamic_inventory_flat: | - {% set res = _dynamic_inventory_flat__urlregion -%} - {%- for cluster_var in res -%}{%- set _ = cluster_var.update({'regionzone': cluster_var.regionzone | regex_replace('^.*/(.*)$', '\\1') }) -%}{%- endfor -%} - {{ res }} - vars: - _dynamic_inventory_flat__urlregion: | - {%- if cluster_vars.inventory_ip == 'private' -%} - {{ r__gcp_compute_instance_info.results | json_query('[*].resources[].{hosttype: labels.hosttype, hostname: name, private_ip: networkInterfaces[0].networkIP, public_ip: networkInterfaces[0].accessConfigs[0].natIP, inventory_ip: networkInterfaces[0].networkIP, regionzone: zone}') }} - {%- else -%} - {{ r__gcp_compute_instance_info.results | json_query('[*].resources[].{hosttype: labels.hosttype, hostname: name, private_ip: networkInterfaces[0].networkIP, public_ip: networkInterfaces[0].accessConfigs[0].natIP, inventory_ip: networkInterfaces[0].accessConfigs[0].natIP, regionzone: zone}') }} - {%- endif -%} diff --git a/dynamic_inventory/tasks/main.yml b/dynamic_inventory/tasks/main.yml index d1c57299..4780028a 100644 --- a/dynamic_inventory/tasks/main.yml +++ b/dynamic_inventory/tasks/main.yml @@ -1,30 +1,33 @@ --- -- name: "dynamic_inventory | Derive dynamic inventory for {{cluster_vars.type}} cluster" - include_tasks: "{{cluster_vars.type}}.yml" +- name: "dynamic_inventory | Get cluster_hosts_state for {{cluster_vars.type}} cluster" + include_role: + name: 'clusterverse/cluster_hosts' + tasks_from: "get_cluster_hosts_state_{{cluster_vars.type}}.yml" -- assert: { that: "dynamic_inventory_flat is defined", msg: "dynamic_inventory_flat is not defined" } +- name: dynamic_inventory | assert that cluster_hosts_state is defined + assert: { that: "cluster_hosts_state is defined", msg: "cluster_hosts_state is not defined" } -- name: dynamic_inventory | dynamic_inventory_flat - debug: msg="{{dynamic_inventory_flat}}" +- name: dynamic_inventory | cluster_hosts_state + debug: msg="{{cluster_hosts_state}}" -- name: dynamic_inventory | Refresh (clean it, because there is no file or plugin inventory defined) the in-memory inventory prior to building it (this is in case this module is called multiple times, and we otherwise only add hosts to existing inventory) +- name: dynamic_inventory | Refresh the in-memory inventory prior to building it (in this case, empties it, because there is no file or plugin inventory defined). This is in case this module is called multiple times, and we otherwise only add hosts to existing inventory. meta: refresh_inventory -- name: dynamic_inventory | get (only network) facts - to determine the local IP/network +- name: dynamic_inventory | Get (network) facts - to determine the local IP/network, to see if we need the bastion below (requires the 'ip' tool (the 'iproute2' package on Ubuntu)) setup: { gather_subset: ["network"] } - name: dynamic_inventory | Add hosts to dynamic inventory add_host: - name: "{{ item.hostname }}" - groups: "{{ item.hosttype }},{{ cluster_name }},{{ clusterid }}{% if 'regionzone' in item %},{{ item.regionzone }}{% endif %}" - ansible_host: "{{ item.inventory_ip }}" - hosttype: "{{ item.hosttype }}" - regionzone: "{{ item.regionzone | default(omit) }}" + name: "{{ item.name }}" + groups: "{{ item.tagslabels.hosttype }},{{ cluster_name }},{{ clusterid }}{% if item.regionzone is defined and item.regionzone %},{{ item.regionzone }}{% endif %}" + ansible_host: "{{ item.ipv4.public if cluster_vars.inventory_ip=='public' else item.ipv4.private }}" + hosttype: "{{ item.tagslabels.hosttype }}" + regionzone: "{{ item.regionzone if item.regionzone else omit }}" ansible_ssh_common_args: "{{ cluster_vars[buildenv].ssh_connection_cfg.bastion.ssh_args if (_bastion_host and (not _bastion_in_host_net or (force_use_bastion is defined and force_use_bastion|bool))) else (omit) }}" # Don't use the bastion if we're running in the same subnet (assumes all hosts in subnet can operate as a bastion), or if the user sets '-e force_use_bastion=true' ansible_user: "{{ cluster_vars[buildenv].ssh_connection_cfg.host.ansible_user | default(omit) }}" ansible_ssh_private_key_file: "{{ cluster_vars[buildenv].ssh_connection_cfg.host.ansible_ssh_private_key_file | default(None) | ternary('id_rsa_ansible_ssh_private_key_file', omit) }}" - with_items: "{{ dynamic_inventory_flat }}" + with_items: "{{ cluster_hosts_state | json_query(\"[?contains('RUNNING,running,poweredOn', instance_state)]\") }}" vars: _local_cidr: "{{ (ansible_default_ipv4.network+'/'+ansible_default_ipv4.netmask) | ipaddr('network/prefix') }}" # Get the network the localhost IP is in _bastion_host: "{{ cluster_vars[buildenv].ssh_connection_cfg.bastion.ssh_args | default() | regex_replace('.*@([]\\w\\d\\.-]*).*', '\\1') }}" # Extract just the bastion hostname from 'cluster_vars[buildenv].ssh_connection_cfg.bastion.ssh_args' @@ -39,10 +42,10 @@ - name: dynamic_inventory | Populate inventory file from dynamic inventory copy: content: | - {% for groupname in groups.keys() -%} + {% for groupname in groups.keys() | sort() -%} {% if groupname not in ["all", "ungrouped"] -%} [{{ groupname }}] - {% for hostname in groups[groupname] %} + {% for hostname in groups[groupname] | sort() %} {{ hostname }} ansible_host={{hostvars[hostname].ansible_host}} hosttype={{ hostvars[hostname].hosttype }} {% if 'ansible_user' in hostvars[hostname] %}ansible_user='{{ hostvars[hostname].ansible_user }}'{% endif %} {% if 'ansible_ssh_private_key_file' in hostvars[hostname] %}ansible_ssh_private_key_file='{{ hostvars[hostname].ansible_ssh_private_key_file }}'{% endif %} {% if 'regionzone' in hostvars[hostname] %}regionzone={{ hostvars[hostname].regionzone }}{% endif %} {% if 'ansible_ssh_common_args' in hostvars[hostname] %}ansible_ssh_common_args='{{ hostvars[hostname].ansible_ssh_common_args }}'{% endif %}{{''}} {% endfor %} diff --git a/readiness/tasks/remove_maintenance_mode_esxifree.yml b/readiness/tasks/remove_maintenance_mode_esxifree.yml index de1369eb..20a4d0e7 100644 --- a/readiness/tasks/remove_maintenance_mode_esxifree.yml +++ b/readiness/tasks/remove_maintenance_mode_esxifree.yml @@ -8,4 +8,4 @@ name: "{{item.name}}" state: "unchanged" annotation: "{{ item.tagslabels | combine({'maintenance_mode': 'false'}) }}" - with_items: "{{ cluster_hosts_state | json_query(\"[].instance_id\") }}" + with_items: "{{ cluster_hosts_state }}" From d78109c2dc6236930fde8e29e971cef6548436c6 Mon Sep 17 00:00:00 2001 From: Dougal Seeley Date: Sat, 27 Mar 2021 18:31:53 +0000 Subject: [PATCH 53/58] Add dynamic_inventory as a dependency of redeployment --- redeploy/meta/main.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/redeploy/meta/main.yml b/redeploy/meta/main.yml index 1773f5c9..089003f4 100644 --- a/redeploy/meta/main.yml +++ b/redeploy/meta/main.yml @@ -1,5 +1,5 @@ --- dependencies: - - role: '_dependencies' + - role: 'dynamic_inventory' - role: 'cluster_hosts' From 0507696e261e45d8bcd54b59fbd81a4d98085bdc Mon Sep 17 00:00:00 2001 From: Dougal Seeley Date: Mon, 5 Apr 2021 17:07:35 +0100 Subject: [PATCH 54/58] Update esxifree_guest.py; add esxifree_guest_info + Update esxifree_guest - add esxifree_guest_info library. Replace python xml.dom.minidom with xmltodict to provide more precise parsing. + Require ansible 2.10.x for Azure (but <2.10.7 due to bug in AWS) + Add disk info to cluster_hosts_state to save cloud calls during rollback with _scheme_rmvm_keepdisk_rollback --- _dependencies/filter_plugins/custom.py | 22 +- _dependencies/library/esxifree_guest.py | 95 ++++--- _dependencies/library/esxifree_guest_LICENSE | 2 +- _dependencies/library/esxifree_guest_info.py | 260 ++++++++++++++++++ _dependencies/tasks/main.yml | 2 +- clean/tasks/dns.yml | 17 ++ .../tasks/get_cluster_hosts_state_aws.yml | 2 +- .../tasks/get_cluster_hosts_state_azure.yml | 2 +- .../get_cluster_hosts_state_esxifree.yml | 34 +-- .../tasks/get_cluster_hosts_state_gcp.yml | 2 +- ..._diskinfo_to_cluster_hosts_target__aws.yml | 25 +- ...info_to_cluster_hosts_target__esxifree.yml | 29 +- ..._diskinfo_to_cluster_hosts_target__gcp.yml | 33 +-- 13 files changed, 388 insertions(+), 137 deletions(-) create mode 100644 _dependencies/library/esxifree_guest_info.py diff --git a/_dependencies/filter_plugins/custom.py b/_dependencies/filter_plugins/custom.py index 0796adaa..cf7e996d 100644 --- a/_dependencies/filter_plugins/custom.py +++ b/_dependencies/filter_plugins/custom.py @@ -1,6 +1,8 @@ #!/usr/bin/env python from ansible.utils.display import Display +from ansible import constants as C +from ansible.module_utils._text import to_native, to_text display = Display() # display.v(u"json_loads_loose - input type: %s" % type(inStr)) @@ -35,24 +37,28 @@ def iplookup(fqdn): return fqdn else: import dns.resolver - return str(dns.resolver.query(fqdn, 'A')[0]) + return to_text(dns.resolver.query(fqdn, 'A')[0]) # Returns a json object from a loosely defined string (e.g. encoded using single quotes instead of double), or an object containing "AnsibleUnsafeText" def json_loads_loose(inStr): - import re, json + import re, json, sys - display.vvv(u"json_loads_loose - input type: %s" % type(inStr)) + display.vv(u"json_loads_loose - input type: %s; value %s" % (type(inStr), inStr)) if type(inStr) is dict or type(inStr) is list: - json_object = json.loads((str(json.dumps(inStr))).encode('utf-8')) + json_object = json.loads((to_text(json.dumps(inStr))).encode('utf-8')) else: try: json_object = json.loads(inStr) - except (ValueError, AttributeError) as e: + except (ValueError, AttributeError, TypeError) as e: try: - json_object = json.loads(str(re.sub(r'\'(.*?)\'([,:}])', r'"\1"\2', inStr).replace(': True', ': "True"').replace(': False', ': "False"')).encode('utf-8')) - except (ValueError, AttributeError) as e: - display.v(u"json_loads_loose - WARNING: could not parse attribute string as json: %s" % inStr) + json_object = json.loads(to_text(re.sub(r'\'(.*?)\'([,:}])', r'"\1"\2', inStr).replace(': True', ': "True"').replace(': False', ': "False"')).encode('utf-8')) + except (ValueError, AttributeError, TypeError) as e: + display.warning(u"json_loads_loose - WARNING: could not parse attribute string (%s) as json: %s" % (to_native(inStr), to_native(e))) return inStr + except: + e = sys.exc_info()[0] + display.warning(u"json_loads_loose - WARNING: could not parse attribute string (%s) as json: %s" % (to_native(inStr), to_native(e))) + return inStr return json_object diff --git a/_dependencies/library/esxifree_guest.py b/_dependencies/library/esxifree_guest.py index e62fb36b..e8315dbc 100644 --- a/_dependencies/library/esxifree_guest.py +++ b/_dependencies/library/esxifree_guest.py @@ -1,7 +1,8 @@ #!/usr/bin/env python3 # -*- coding: utf-8 -*- -# Copyright 2020 Dougal Seeley +# Copyright (c) 2021, Dougal Seeley +# https://github.com/dseeley/esxifree_guest # BSD 3-Clause License from __future__ import absolute_import, division, print_function @@ -23,9 +24,10 @@ requirements: - python >= 2.7 - paramiko +- xmltodict notes: - Please make sure that the user used for esxifree_guest should have correct level of privileges. - - Tested on vSphere 6.7 + - Tested on vSphere 7.0.2 options: hostname: description: @@ -343,6 +345,7 @@ import base64 import yaml import errno # For the python2.7 IOError, because FileNotFound is for python3 +import xmltodict # define a custom yaml representer to force quoted strings yaml.add_representer(str, lambda dumper, data: dumper.represent_scalar('tag:yaml.org,2002:str', data, style='"')) @@ -359,7 +362,6 @@ from cookielib import CookieJar from httplib import HTTPResponse import ssl -import xml.dom.minidom if sys.version_info[0] < 3: from io import BytesIO as StringIO @@ -381,7 +383,8 @@ def __init__(self, host, username, password): self.vmware_soap_session_cookie = None self.host = host response, cookies = self.send_req("<_this>ServiceInstance") - sessionManager_name = xml.dom.minidom.parseString(response.read()).getElementsByTagName("sessionManager")[0].firstChild.data + xmltodictresponse = xmltodict.parse(response.read()) + sessionManager_name = xmltodictresponse['soapenv:Envelope']['soapenv:Body']['RetrieveServiceContentResponse']['returnval']['sessionManager']['#text'] response, cookies = self.send_req("<_this>" + sessionManager_name + "" + username + "" + password + "") self.vmware_soap_session_cookie = cookies['vmware_soap_session'].value @@ -406,17 +409,16 @@ def wait_for_task(self, task, timeout=30): while time_s > 0: response, cookies = self.send_req('<_this type="PropertyCollector">ha-property-collectorTaskfalseinfo' + task + 'false') if isinstance(response, HTTPResponse) or isinstance(response, addinfourl): - xmldom = xml.dom.minidom.parseString(response.read()) - if len(xmldom.getElementsByTagName('state')): - if xmldom.getElementsByTagName('state')[0].firstChild.data == 'success': - response = xmldom.getElementsByTagName('state')[0].firstChild.data - break - elif xmldom.getElementsByTagName('state')[0].firstChild.data == 'error': - response = str(xmldom.toxml()) - break - else: + xmltodictresponse = xmltodict.parse(response.read()) + if xmltodictresponse['soapenv:Envelope']['soapenv:Body']['RetrievePropertiesResponse']['returnval']['propSet']['val'] == 'running': time.sleep(1) time_s = time_s - 1 + elif xmltodictresponse['soapenv:Envelope']['soapenv:Body']['RetrievePropertiesResponse']['returnval']['propSet']['val']['state'] == 'success': + response = xmltodictresponse['soapenv:Envelope']['soapenv:Body']['RetrievePropertiesResponse']['returnval']['propSet']['val']['state'] + break + elif xmltodictresponse['soapenv:Envelope']['soapenv:Body']['RetrievePropertiesResponse']['returnval']['propSet']['val']['state'] == 'error': + response = str(xmltodictresponse) + break else: break return response @@ -623,9 +625,8 @@ def create_vm(self, vmTemplate=None, annotation=None, datastore=None, hardware=N ### Disk cloning - clone all disks from source response, cookies = self.soap_client.send_req('<_this type="PropertyCollector">ha-property-collectorVirtualMachinefalselayout' + str(template_moid) + 'false') - xmldom = xml.dom.minidom.parseString(response.read()) - srcDiskFiles = [data.firstChild.data for data in xmldom.getElementsByTagName("diskFile")] - + xmltodictresponse = xmltodict.parse(response.read(), force_list='disk') + srcDiskFiles = [disk.get('diskFile') for disk in xmltodictresponse['soapenv:Envelope']['soapenv:Body']['RetrievePropertiesExResponse']['returnval']['objects']['propSet']['val']['disk']] for srcDiskFile in srcDiskFiles: srcDiskFileInfo = re.search('^\[(?P.*?)\] *(?P.*\/(?P(?P.*?)(?:--(?P.*?))?\.vmdk))$', srcDiskFile) diskTypeKey = next((key for key, val in template_vmxDict.items() if val == srcDiskFileInfo.group('filepath')), None) @@ -774,22 +775,22 @@ def update_vm(self, annotation, disks): if annotation: # Update the config (annotation) in the running VM response, cookies = self.soap_client.send_req('<_this type="VirtualMachine">' + str(self.moid) + '' + annotation + '') - waitresp = self.soap_client.wait_for_task(xml.dom.minidom.parseString(response.read()).getElementsByTagName('returnval')[0].firstChild.data) + waitresp = self.soap_client.wait_for_task(xmltodict.parse(response.read())['soapenv:Envelope']['soapenv:Body']['ReconfigVM_TaskResponse']['returnval']['#text']) if waitresp != 'success': return ("Failed to ReconfigVM_Task: %s" % waitresp) - # Now update the disk (should not be necessary, but for some reason, sometimes the ReconfigVM_Task does not flush config to disk). + # Now update the vmxFile on disk (should not be necessary, but for some reason, sometimes the ReconfigVM_Task does not flush config to disk). vmxDict.update({"annotation": annotation}) if disks: curDisks = [{"filename": vmxDict[scsiDisk], "volname": re.sub(r".*--([\w\d]+)\.vmdk", r"\1", vmxDict[scsiDisk])} for scsiDisk in sorted(vmxDict) if re.match(r"scsi0:\d\.filename", scsiDisk)] curDisksCount = len(curDisks) newDisks = [newDisk for newDisk in disks if ('boot' not in newDisk or newDisk['boot'] == False)] - for newDiskCount,newDisk in enumerate(newDisks): + for newDiskCount, newDisk in enumerate(newDisks): scsiDiskIdx = newDiskCount + curDisksCount disk_filename = self.name + "--" + newDisk['volname'] + ".vmdk" - #Don't clone already existing disks + # Don't clone already-existing disks try: (stdin, stdout, stderr) = self.esxiCnx.exec_command("stat " + os.path.dirname(vmxPath) + "/" + disk_filename) except IOError as e: @@ -809,7 +810,7 @@ def update_vm(self, annotation, disks): (stdin, stdout, stderr) = self.esxiCnx.exec_command("vmkfstools -c " + str(newDisk['size_gb']) + "G -d " + newDisk['type'] + " " + os.path.dirname(vmxPath) + "/" + disk_filename) # if this is a new disk, not a restatement of an existing disk: - if len(curDisks) >= newDiskCount+2 and curDisks[newDiskCount+1]['volname'] == newDisk['volname']: + if len(curDisks) >= newDiskCount + 2 and curDisks[newDiskCount + 1]['volname'] == newDisk['volname']: pass else: vmxDict.update({"scsi0:" + str(scsiDiskIdx) + ".devicetype": "scsi-hardDisk"}) @@ -909,23 +910,37 @@ class cDummyAnsibleModule(): "cloudinit_userdata": [], "customvalues": [], "datastore": "4tb-evo860-ssd", + "disks": [], # "disks": [{"size_gb": 1, "type": "thin", "volname": "test"}], - "disks": [{"size_gb": 1, "type": "thin", "volname": "test", "src": {"backing_filename": "[4tb-evo860-ssd] testdisks-dev-sys-a0-1601204786/testdisks-dev-sys-a0-1601204786--test.vmdk", "copy_or_move": "move"}}], + # "disks": [{"size_gb": 1, "type": "thin", "volname": "test", "src": {"backing_filename": "[4tb-evo860-ssd] testdisks-dev-sys-a0-1601204786/testdisks-dev-sys-a0-1601204786--test.vmdk", "copy_or_move": "move"}}], "force": False, "guest_id": "ubuntu-64", "hardware": {"memory_mb": "2048", "num_cpus": "2", "version": "15"}, "hostname": "192.168.1.3", "moid": None, - "name": "testdisks-dev-sys-a0-1601205102", + "name": "dougal-test-dev-sys-a0-new", "networks": [{"cloudinit_netplan": {"ethernets": {"eth0": {"dhcp4": True}}}, "networkName": "VM Network", "virtualDev": "vmxnet3"}], "password": sys.argv[2], "state": "present", - "template": "gold-ubuntu2004-20200912150257", + "template": "dougal-test-dev-sys-a0-1617553110", "username": "svc", "wait": True, "wait_timeout": 180 } + # ## Poweroff VM + # params = { + # # "annotation": "{'Name': 'dougal-test-dev-sysdisks2-a0-1617548508', 'hosttype': 'sysdisks2', 'env': 'dev', 'cluster_name': 'dougal-test-dev', 'owner': 'dougal', 'cluster_suffix': '1617548508', 'lifecycle_state': 'retiring', 'maintenance_mode': 'false'}", + # "disks": None, + # "hostname": "192.168.1.3", + # "name": "dougal-test-dev-sysdisks2-a0-1617548508", + # "moid": None, + # "password": sys.argv[2], + # "state": "poweredoff", + # "username": "svc", + # "wait_timeout": 180 + # } + ## Delete VM # params = { # "hostname": "192.168.1.3", @@ -984,7 +999,7 @@ def fail_json(self, msg): (stdin, stdout, stderr) = iScraper.esxiCnx.exec_command("vim-cmd vmsvc/power.getstate " + str(iScraper.moid)) if re.search('Powered off', stdout.read().decode('UTF-8')) is not None: response, cookies = iScraper.soap_client.send_req('<_this type="VirtualMachine">' + str(iScraper.moid) + '') - if iScraper.soap_client.wait_for_task(xml.dom.minidom.parseString(response.read()).getElementsByTagName('returnval')[0].firstChild.data, int(module.params['wait_timeout'])) != 'success': + if iScraper.soap_client.wait_for_task(xmltodict.parse(response.read())['soapenv:Envelope']['soapenv:Body']['PowerOnVM_TaskResponse']['returnval']['#text'], int(module.params['wait_timeout'])) != 'success': module.fail_json(msg="Failed to PowerOnVM_Task") module.exit_json(changed=True, meta={"msg": "Powered-on " + iScraper.name + ": " + str(iScraper.moid)}) else: @@ -997,7 +1012,7 @@ def fail_json(self, msg): (stdin, stdout, stderr) = iScraper.esxiCnx.exec_command("vim-cmd vmsvc/power.getstate " + str(iScraper.moid)) if re.search('Powered on', stdout.read().decode('UTF-8')) is not None: response, cookies = iScraper.soap_client.send_req('<_this type="VirtualMachine">' + str(iScraper.moid) + '') - if iScraper.soap_client.wait_for_task(xml.dom.minidom.parseString(response.read()).getElementsByTagName('returnval')[0].firstChild.data, int(module.params['wait_timeout'])) != 'success': + if iScraper.soap_client.wait_for_task(xmltodict.parse(response.read())['soapenv:Envelope']['soapenv:Body']['PowerOffVM_TaskResponse']['returnval']['#text'], int(module.params['wait_timeout'])) != 'success': module.fail_json(msg="Failed to PowerOffVM_Task") module.exit_json(changed=True, meta={"msg": "Powered-off " + iScraper.name + ": " + str(iScraper.moid)}) else: @@ -1009,10 +1024,10 @@ def fail_json(self, msg): if iScraper.moid: # Turn off (ignoring failures), then destroy response, cookies = iScraper.soap_client.send_req('<_this type="VirtualMachine">' + str(iScraper.moid) + '') - iScraper.soap_client.wait_for_task(xml.dom.minidom.parseString(response.read()).getElementsByTagName('returnval')[0].firstChild.data, int(module.params['wait_timeout'])) + iScraper.soap_client.wait_for_task(xmltodict.parse(response.read())['soapenv:Envelope']['soapenv:Body']['PowerOffVM_TaskResponse']['returnval']['#text'], int(module.params['wait_timeout'])) response, cookies = iScraper.soap_client.send_req('<_this type="VirtualMachine">' + str(iScraper.moid) + '') - if iScraper.soap_client.wait_for_task(xml.dom.minidom.parseString(response.read()).getElementsByTagName('returnval')[0].firstChild.data, int(module.params['wait_timeout'])) != 'success': + if iScraper.soap_client.wait_for_task(xmltodict.parse(response.read())['soapenv:Envelope']['soapenv:Body']['Destroy_TaskResponse']['returnval']['#text'], int(module.params['wait_timeout'])) != 'success': module.fail_json(msg="Failed to Destroy_Task") module.exit_json(changed=True, meta={"msg": "Deleted " + iScraper.name + ": " + str(iScraper.moid)}) else: @@ -1023,12 +1038,10 @@ def fail_json(self, msg): (stdin, stdout, stderr) = iScraper.esxiCnx.exec_command("vim-cmd vmsvc/power.getstate " + str(iScraper.moid)) if re.search('Powered off', stdout.read().decode('UTF-8')) is not None: response, cookies = iScraper.soap_client.send_req('<_this type="VirtualMachine">' + str(iScraper.moid) + '') - if iScraper.soap_client.wait_for_task(xml.dom.minidom.parseString(response.read()).getElementsByTagName('returnval')[0].firstChild.data, int(module.params['wait_timeout'])) != 'success': + if iScraper.soap_client.wait_for_task(xmltodict.parse(response.read())['soapenv:Envelope']['soapenv:Body']['PowerOffVM_TaskResponse']['returnval']['#text'], int(module.params['wait_timeout'])) != 'success': module.fail_json(msg="Failed to PowerOnVM_Task") else: response, cookies = iScraper.soap_client.send_req('<_this type="VirtualMachine">' + str(iScraper.moid) + '') - if iScraper.soap_client.wait_for_task(xml.dom.minidom.parseString(response.read()).getElementsByTagName('returnval')[0].firstChild.data, int(module.params['wait_timeout'])) != 'success': - module.fail_json(msg="Failed to RebootGuest") module.exit_json(changed=True, meta={"msg": "Rebooted " + iScraper.name + ": " + str(iScraper.moid)}) else: module.fail_json(msg="VM doesn't exist.") @@ -1039,18 +1052,26 @@ def fail_json(self, msg): if iScraper.moid and module.params['force']: # Turn off (ignoring failures), then destroy response, cookies = iScraper.soap_client.send_req('<_this type="VirtualMachine">' + str(iScraper.moid) + '') - iScraper.soap_client.wait_for_task(xml.dom.minidom.parseString(response.read()).getElementsByTagName('returnval')[0].firstChild.data, int(module.params['wait_timeout'])) + if iScraper.soap_client.wait_for_task(xmltodict.parse(response.read())['soapenv:Envelope']['soapenv:Body']['PowerOffVM_TaskResponse']['returnval']['#text'], int(module.params['wait_timeout'])) != 'success': + module.fail_json(msg="Failed to PowerOffVM_Task (prior to Destroy_Task") response, cookies = iScraper.soap_client.send_req('<_this type="VirtualMachine">' + str(iScraper.moid) + '') - if iScraper.soap_client.wait_for_task(xml.dom.minidom.parseString(response.read()).getElementsByTagName('returnval')[0].firstChild.data, int(module.params['wait_timeout'])) != 'success': + if iScraper.soap_client.wait_for_task(xmltodict.parse(response.read())['soapenv:Envelope']['soapenv:Body']['Destroy_TaskResponse']['returnval']['#text'], int(module.params['wait_timeout'])) != 'success': module.fail_json(msg="Failed to Destroy_Task") iScraper.moid = None # If the VM doesn't exist, create it. if iScraper.moid is None: - createVmResult = iScraper.create_vm(module.params['template'], module.params['annotation'], module.params['datastore'], module.params['hardware'], module.params['guest_id'], module.params['disks'], module.params['cdrom'], module.params['customvalues'], module.params['networks'], module.params['cloudinit_userdata']) - if createVmResult != None: - module.fail_json(msg="Failed to create_vm: %s" % createVmResult) + # If we're cloning, ensure template VM is powered off. + if module.params['template'] is not None: + iScraperTemplate = esxiFreeScraper(hostname=module.params['hostname'], username=module.params['username'], password=module.params['password'], name=module.params['template'], moid=None) + (stdin, stdout, stderr) = iScraper.esxiCnx.exec_command("vim-cmd vmsvc/power.getstate " + str(iScraperTemplate.moid)) + if re.search('Powered off', stdout.read().decode('UTF-8')) is not None: + createVmResult = iScraper.create_vm(module.params['template'], module.params['annotation'], module.params['datastore'], module.params['hardware'], module.params['guest_id'], module.params['disks'], module.params['cdrom'], module.params['customvalues'], module.params['networks'], module.params['cloudinit_userdata']) + if createVmResult != None: + module.fail_json(msg="Failed to create_vm: %s" % createVmResult) + else: + module.fail_json(msg="Template VM must be powered off before cloning") else: updateVmResult = iScraper.update_vm(annotation=module.params['annotation'], disks=module.params['disks']) @@ -1060,7 +1081,7 @@ def fail_json(self, msg): (stdin, stdout, stderr) = iScraper.esxiCnx.exec_command("vim-cmd vmsvc/power.getstate " + str(iScraper.moid)) if re.search('Powered off', stdout.read().decode('UTF-8')) is not None: response, cookies = iScraper.soap_client.send_req('<_this type="VirtualMachine">' + str(iScraper.moid) + '') - if iScraper.soap_client.wait_for_task(xml.dom.minidom.parseString(response.read()).getElementsByTagName('returnval')[0].firstChild.data, int(module.params['wait_timeout'])) != 'success': + if iScraper.soap_client.wait_for_task(xmltodict.parse(response.read())['soapenv:Envelope']['soapenv:Body']['PowerOnVM_TaskResponse']['returnval']['#text'], int(module.params['wait_timeout'])) != 'success': module.fail_json(msg="Failed to PowerOnVM_Task") isChanged = True diff --git a/_dependencies/library/esxifree_guest_LICENSE b/_dependencies/library/esxifree_guest_LICENSE index 3c642ec5..7dce5362 100644 --- a/_dependencies/library/esxifree_guest_LICENSE +++ b/_dependencies/library/esxifree_guest_LICENSE @@ -1,6 +1,6 @@ BSD 3-Clause License -Copyright (c) 2020, Dougal Seeley +Copyright (c) 2021, Dougal Seeley All rights reserved. Redistribution and use in source and binary forms, with or without diff --git a/_dependencies/library/esxifree_guest_info.py b/_dependencies/library/esxifree_guest_info.py new file mode 100644 index 00000000..7f4e0c4f --- /dev/null +++ b/_dependencies/library/esxifree_guest_info.py @@ -0,0 +1,260 @@ +#!/usr/bin/env python3 +# -*- coding: utf-8 -*- + +# Copyright (c) 2021, Dougal Seeley +# https://github.com/dseeley/esxifree_guest +# BSD 3-Clause License + +from __future__ import absolute_import, division, print_function + +__metaclass__ = type + +ANSIBLE_METADATA = {'metadata_version': '1.1', 'status': ['preview'], 'supported_by': 'community'} + +DOCUMENTATION = r''' +--- +module: esxifree_guest_info +short_description: Retrieves virtual machine info in ESXi without a dependency on the vSphere/ vCenter API. +description: > + This module can be used to retrieve virtual machine info. When fetching all VM info, does so atomically (a single SOAP call), to prevent race conditions. +version_added: '2.9' +author: +- Dougal Seeley (ansible@dougalseeley.com) +requirements: +- python >= 2.7 +- xmltodict +notes: + - Please make sure that the user used for esxifree_guest should have correct level of privileges. + - Tested on vSphere 7.0.2 +options: + hostname: + description: + - The hostname or IP address of the ESXi server. + required: true + type: str + username: + description: + - The username to access the ESXi server at C(hostname). + required: true + type: str + password: + description: + - The password of C(username) for the ESXi server, or the password for the private key (if required). + required: true + type: str + name: + description: + - Name of the virtual machine to work with (optional). + - Virtual machine names in ESXi are unique + - This parameter is case sensitive. + type: str + moid: + description: + - Managed Object ID of the virtual machine to manage + type: str +''' +EXAMPLES = r''' +- name: Get virtual machine for ALL VMs + esxifree_guest_info: + hostname: "192.168.1.3" + username: "svc" + password: "my_passsword" + delegate_to: localhost + +- name: Get virtual machine for specific VM + esxifree_guest_info: + hostname: "192.168.1.3" + username: "svc" + password: "my_passsword" + name: "my_vm" + delegate_to: localhost +''' + +RETURN = r''' +instance: + description: metadata about the virtual machine + returned: always + type: dict + sample: None +''' + +import json +import re +import sys +import xmltodict + +# For the soap client +try: + from urllib.request import Request, build_opener, HTTPSHandler, HTTPCookieProcessor + from urllib.response import addinfourl + from urllib.error import HTTPError + from http.cookiejar import CookieJar + from http.client import HTTPResponse +except ImportError: + from urllib2 import Request, build_opener, HTTPError, HTTPSHandler, HTTPCookieProcessor, addinfourl + from cookielib import CookieJar + from httplib import HTTPResponse +import ssl + +try: + from ansible.module_utils.basic import AnsibleModule +except: + pass + + +# Executes soap requests on the remote host. +class vmw_soap_client(object): + def __init__(self, host, username, password): + self.vmware_soap_session_cookie = None + self.host = host + response, cookies = self.send_req("<_this>ServiceInstance") + xmltodictresponse = xmltodict.parse(response.read()) + sessionManager_name = xmltodictresponse['soapenv:Envelope']['soapenv:Body']['RetrieveServiceContentResponse']['returnval']['sessionManager']['#text'] + + response, cookies = self.send_req("<_this>" + sessionManager_name + "" + username + "" + password + "") + self.vmware_soap_session_cookie = cookies['vmware_soap_session'].value + + def send_req(self, envelope_body=None): + envelope = '' + '' + str(envelope_body) + '' + cj = CookieJar() + req = Request( + url='https://' + self.host + '/sdk/vimService.wsdl', data=envelope.encode(), + headers={"Content-Type": "text/xml", "SOAPAction": "urn:vim25/6.7.3", "Accept": "*/*", "Cookie": "vmware_client=VMware; vmware_soap_session=" + str(self.vmware_soap_session_cookie)}) + + opener = build_opener(HTTPSHandler(context=ssl._create_unverified_context()), HTTPCookieProcessor(cj)) + try: + response = opener.open(req, timeout=30) + except HTTPError as err: + response = str(err) + cookies = {i.name: i for i in list(cj)} + return (response[0] if isinstance(response, list) else response, cookies) # If the cookiejar contained anything, we get a list of two responses + + +class esxiFreeScraper(object): + def __init__(self, hostname, username='root', password=None): + self.soap_client = vmw_soap_client(host=hostname, username=username, password=password) + + def get_vm_info(self, name=None, moid=None): + if moid: + response, cookies = self.soap_client.send_req('<_this type="PropertyCollector">ha-property-collectorVirtualMachinetrue' + str(moid) + 'false') + xmltodictresponse = xmltodict.parse(response.read()) + return (self.parse_vm(xmltodictresponse['soapenv:Envelope']['soapenv:Body']['RetrievePropertiesExResponse']['returnval']['objects'])) + elif name: + virtual_machines = self.get_all_vm_info() + return ([vm for vm in virtual_machines if vm['hw_name'] == name][0]) + + def get_all_vm_info(self): + response, cookies = self.soap_client.send_req('<_this type="PropertyCollector">ha-property-collectorVirtualMachinefalsenameconfigconfigStatusdatastoreguestlayoutlayoutExruntimeha-folder-vmtraverseChildFolderchildEntity traverseChildDatacentervmFoldertraverseChild ') + xmltodictresponse = xmltodict.parse(response.read()) + + virtual_machines = [] + for vm_instance in xmltodictresponse['soapenv:Envelope']['soapenv:Body']['RetrievePropertiesExResponse']['returnval']['objects']: + virtual_machines.append(self.parse_vm(vm_instance)) + + return (virtual_machines) + + def _getObjSafe(self, inDict, *keys): + for key in keys: + try: + inDict = inDict[key] + except KeyError: + return None + return inDict + + def parse_vm(self, vmObj): + configObj = [propSetObj for propSetObj in vmObj['propSet'] if propSetObj['name'] == 'config'][0]['val'] + runtimeObj = [propSetObj for propSetObj in vmObj['propSet'] if propSetObj['name'] == 'runtime'][0]['val'] + guestObj = [propSetObj for propSetObj in vmObj['propSet'] if propSetObj['name'] == 'guest'][0]['val'] + layoutExObj = [propSetObj for propSetObj in vmObj['propSet'] if propSetObj['name'] == 'layoutEx'][0]['val'] + newObj = {} + + newObj.update({"advanced_settings": {advObj['key']: advObj['value'].get('#text') for advObj in configObj['extraConfig']}}) + newObj.update({"annotation": configObj['annotation']}) + newObj.update({"consolidationNeeded": runtimeObj['consolidationNeeded']}) + newObj.update({"guest_tools_status": guestObj['toolsRunningStatus']}) + newObj.update({"guest_tools_version": guestObj['toolsVersion'] if 'toolsVersion' in guestObj else None}) + newObj.update({"hw_cores_per_socket": configObj['hardware']['numCoresPerSocket']}) + newObj.update({"hw_datastores": [configObj['datastoreUrl']['name']]}) + newObj.update({"hw_files": [file.get('name') for file in layoutExObj['file'] if file.get('type') in ['config', 'nvram', 'diskDescriptor', 'snapshotList', 'log']]}) + newObj.update({"hw_guest_full_name": guestObj['guestFullName'] if 'guestFullName' in guestObj else None}) + newObj.update({"hw_guest_id": guestObj['guestId'] if 'guestId' in guestObj else None}) + newObj.update({"hw_is_template": configObj['template']}) + newObj.update({"hw_memtotal_mb": int(configObj['hardware']['memoryMB'])}) + newObj.update({"hw_name": [propSetObj for propSetObj in vmObj['propSet'] if propSetObj['name'] == 'name'][0]['val'].get('#text')}) + newObj.update({"hw_power_status": runtimeObj['powerState']}) + newObj.update({"hw_processor_count": int(configObj['hardware']['numCPU'])}) + newObj.update({"hw_product_uuid": configObj['uuid']}) + newObj.update({"hw_version": configObj['version']}) + newObj.update({"ipv4": guestObj['ipAddress'] if 'ipAddress' in guestObj else None}) + newObj.update({"moid": vmObj['obj'].get('#text')}) + + guest_disk_info = [] + for virtualDiskObj in [diskObj for diskObj in configObj['hardware']['device'] if diskObj['@xsi:type'] == 'VirtualDisk']: + guest_disk_info.append({ + "backing_datastore": re.sub(r'^\[(.*?)\].*$', r'\1', virtualDiskObj['backing']['fileName']), + "backing_disk_mode": virtualDiskObj['backing']['diskMode'], + "backing_diskmode": virtualDiskObj['backing']['diskMode'], + "backing_eagerlyscrub": self._getObjSafe(virtualDiskObj, 'backing', 'eagerlyScrub'), + "backing_filename": virtualDiskObj['backing']['fileName'], + "backing_thinprovisioned": virtualDiskObj['backing']['thinProvisioned'], + "backing_type": re.sub(r'^VirtualDisk(.*?)BackingInfo$', r'\1', virtualDiskObj['backing']['@xsi:type']), + "backing_uuid": self._getObjSafe(virtualDiskObj, 'backing', 'uuid'), + "backing_writethrough": virtualDiskObj['backing']['writeThrough'], + "capacity_in_bytes": int(virtualDiskObj['capacityInBytes']), + "capacity_in_kb": int(virtualDiskObj['capacityInKB']), + "controller_key": virtualDiskObj['controllerKey'], + "controller_bus_number": [deviceObj['busNumber'] for deviceObj in configObj['hardware']['device'] if deviceObj['key'] == virtualDiskObj['controllerKey']][0], + "controller_type": [deviceObj['@xsi:type'] for deviceObj in configObj['hardware']['device'] if deviceObj['key'] == virtualDiskObj['controllerKey']][0], + "key": virtualDiskObj['key'], + "label": virtualDiskObj['deviceInfo']['label'], + "summary": virtualDiskObj['deviceInfo']['summary'], + "unit_number": int(virtualDiskObj['unitNumber']) + }) + newObj.update({"guest_disk_info": guest_disk_info}) + + return newObj + + +def main(): + argument_spec = { + "hostname": {"type": "str", "required": True}, + "username": {"type": "str", "required": True}, + "password": {"type": "str", "required": True}, + "name": {"type": "str"}, + "moid": {"type": "str"} + } + + if not (len(sys.argv) > 1 and sys.argv[1] == "console"): + module = AnsibleModule(argument_spec=argument_spec, supports_check_mode=True) + else: + # For testing without Ansible (e.g on Windows) + class cDummyAnsibleModule(): + params = { + "hostname": "192.168.1.3", + "username": "svc", + "password": sys.argv[2], + "name": None, # "parsnip-prod-sys-a0-1616868999", + "moid": None # 350 + } + + def exit_json(self, changed, **kwargs): + print(changed, json.dumps(kwargs, sort_keys=True, indent=2, separators=(',', ': '))) + + def fail_json(self, msg): + print("Failed: " + msg) + exit(1) + + module = cDummyAnsibleModule() + + iScraper = esxiFreeScraper(hostname=module.params['hostname'], username=module.params['username'], password=module.params['password']) + + if ("moid" in module.params and module.params['name']) or ("name" in module.params and module.params['moid']): + vm_info = iScraper.get_vm_info(name=module.params['name'], moid=module.params['moid']) + else: + vm_info = iScraper.get_all_vm_info() + + module.exit_json(changed=False, virtual_machines=vm_info) + + +if __name__ == '__main__': + main() diff --git a/_dependencies/tasks/main.yml b/_dependencies/tasks/main.yml index 14106173..3caa1de0 100644 --- a/_dependencies/tasks/main.yml +++ b/_dependencies/tasks/main.yml @@ -30,7 +30,7 @@ - name: Preflight check block: - - assert: { that: "ansible_version.full is version_compare('2.9', '>=')", fail_msg: "Ansible >=2.9 required." } + - assert: { that: "ansible_version.full is version_compare('2.10', '>=') and ansible_version.full is version_compare('2.10.6', '<=')", fail_msg: "2.10.6 >= Ansible >= 2.10 required." } #2.10.7 has issue with AWS DNS: https://github.com/ansible-collections/community.aws/issues/523 - assert: { that: "app_name is defined and app_name != ''", fail_msg: "Please define app_name" } - assert: { that: "app_class is defined and app_class != ''", fail_msg: "Please define app_class" } - assert: { that: "cluster_vars is defined", fail_msg: "Please define cluster_vars" } diff --git a/clean/tasks/dns.yml b/clean/tasks/dns.yml index 075e722a..333fe4d0 100644 --- a/clean/tasks/dns.yml +++ b/clean/tasks/dns.yml @@ -35,6 +35,21 @@ - name: clean/dns/route53 | Delete DNS entries block: +# - name: clean/dns/route53 | Get Zone +# route53_zone: +# aws_access_key: "{{cluster_vars[buildenv].aws_access_key}}" +# aws_secret_key: "{{cluster_vars[buildenv].aws_secret_key}}" +# zone: "{{cluster_vars.dns_nameserver_zone}}" +# register: r__route53_zone +# +# - name: clean/dns/route53 | Get A records +# route53_info: +# query: record_sets +# hosted_zone_id: "{{ r__route53_zone.zone_id }}" +# start_record_name: "{{item.name}}.{{cluster_vars.dns_user_domain}}" +# register: r__route53_info +# with_items: "{{ hosts_to_clean }}" + - name: clean/dns/route53 | Get A records route53: aws_access_key: "{{cluster_vars[buildenv].aws_access_key}}" @@ -47,6 +62,8 @@ register: r__route53_a with_items: "{{ hosts_to_clean }}" + - debug: msg={{r__route53_a}} + - name: clean/dns/route53 | Delete A records route53: aws_access_key: "{{cluster_vars[buildenv].aws_access_key}}" diff --git a/cluster_hosts/tasks/get_cluster_hosts_state_aws.yml b/cluster_hosts/tasks/get_cluster_hosts_state_aws.yml index f3546ebf..48bee157 100644 --- a/cluster_hosts/tasks/get_cluster_hosts_state_aws.yml +++ b/cluster_hosts/tasks/get_cluster_hosts_state_aws.yml @@ -14,4 +14,4 @@ - name: get_cluster_hosts_state/aws | Set cluster_hosts_state set_fact: - cluster_hosts_state: "{{r__ec2_instance_info.instances | json_query(\"[].{name: tags.Name, regionzone: placement.availability_zone, tagslabels: tags, instance_id: instance_id, instance_state: state.name, ipv4: {private: private_ip_address, public: public_ip_address} }\") }}" + cluster_hosts_state: "{{r__ec2_instance_info.instances | json_query(\"[].{name: tags.Name, regionzone: placement.availability_zone, tagslabels: tags, instance_id: instance_id, instance_state: state.name, ipv4: {private: private_ip_address, public: public_ip_address}, disk_info_cloud: block_device_mappings }\") }}" diff --git a/cluster_hosts/tasks/get_cluster_hosts_state_azure.yml b/cluster_hosts/tasks/get_cluster_hosts_state_azure.yml index 4a344c91..d8344c7d 100644 --- a/cluster_hosts/tasks/get_cluster_hosts_state_azure.yml +++ b/cluster_hosts/tasks/get_cluster_hosts_state_azure.yml @@ -55,7 +55,7 @@ tenant: "{{cluster_vars[buildenv].azure_tenant}}" resource_group: "{{cluster_vars[buildenv].azure_resource_group}}" name: "{{ item.networkInterface | basename }}" - with_items: "{{ r__azure_rm_resource_info.results | json_query(\"[].{name: item.name, regionzone: join('-',[item.location,response[0].zones[0]]), tagslabels: item.tags, instance_id: item.id, instance_state: item.power_state, networkInterface: response[0].properties.networkProfile.networkInterfaces[0].id }\") }}" + with_items: "{{ r__azure_rm_resource_info.results | to_json | from_json | json_query(\"[].{name: item.name, regionzone: join('-',[item.location,response[0].zones[0]]), tagslabels: item.tags, instance_id: item.id, instance_state: item.power_state, networkInterface: response[0].properties.networkProfile.networkInterfaces[0].id }\") }}" register: r__azure_rm_networkinterface_info delegate_to: localhost run_once: true diff --git a/cluster_hosts/tasks/get_cluster_hosts_state_esxifree.yml b/cluster_hosts/tasks/get_cluster_hosts_state_esxifree.yml index dfe18025..775be0a7 100644 --- a/cluster_hosts/tasks/get_cluster_hosts_state_esxifree.yml +++ b/cluster_hosts/tasks/get_cluster_hosts_state_esxifree.yml @@ -1,43 +1,29 @@ --- - name: get_cluster_hosts_state/esxifree | Get basic instance info of all vms - vmware_vm_info: + esxifree_guest_info: username: "{{ cluster_vars.username }}" password: "{{ cluster_vars.password }}" hostname: "{{ cluster_vars.esxi_ip }}" - validate_certs: no - register: r__vmware_vm_info - delegate_to: localhost - run_once: true - -- name: get_cluster_hosts_state/esxifree | Get detailed instance info of cluster_name VMs - vmware_guest_info: - username: "{{ cluster_vars.username }}" - password: "{{ cluster_vars.password }}" - hostname: "{{ cluster_vars.esxi_ip }}" - validate_certs: no - datacenter: None - uuid: "{{item.uuid}}" - with_items: "{{ r__vmware_vm_info.virtual_machines | to_json | from_json | json_query(\"[?starts_with(guest_name, '\"+cluster_name+\"')]\") }}" - register: r__vmware_guest_info + register: r__esxifree_guest_info delegate_to: localhost run_once: true ## esxifree hosts must use the esxi 'annotations' field as json. They are stored as unconventional text in the vmx file, so must ## be converted into inline-json within the facts. If the annotation field is not convertible to json, then we don't consider this VM part of the cluster. -- name: get_cluster_hosts_state/esxifree | update r__vmware_guest_info result with json-parsed annotations +- name: get_cluster_hosts_state/esxifree | update r__esxifree_guest_info result with json-parsed annotations set_fact: - r__vmware_guest_info: | - {% set res = {'results': []} -%} - {%- for result in r__vmware_guest_info.results -%} - {%- set loadloose_res = result.instance.annotation | json_loads_loose -%} + r__esxifree_guest_info: | + {% set res = {'virtual_machines': []} -%} + {%- for result in r__esxifree_guest_info.virtual_machines -%} + {%- set loadloose_res = result.annotation | json_loads_loose -%} {%- if loadloose_res | type_debug == 'dict' or loadloose_res | type_debug == 'list' -%} - {%- set _ = result.instance.update({'annotation': loadloose_res}) -%} - {%- set _ = res.results.append(result) -%} + {%- set _ = result.update({'annotation': loadloose_res}) -%} + {%- set _ = res.virtual_machines.append(result) -%} {%- endif -%} {%- endfor -%} {{ res }} - name: get_cluster_hosts_state/esxifree | Set cluster_hosts_state set_fact: - cluster_hosts_state: "{{ r__vmware_guest_info.results | json_query(\"[].{name: instance.hw_name, regionzone: None, tagslabels: instance.annotation, instance_id: instance.moid, instance_state: instance.hw_power_status, ipv4: {private: item.ip_address, public: null} }\") }}" + cluster_hosts_state: "{{ r__esxifree_guest_info.virtual_machines | json_query(\"[?annotation.cluster_name==`\" + cluster_name + \"`].{name: hw_name, regionzone: None, tagslabels: annotation, instance_id: moid, instance_state: hw_power_status, ipv4: {private: ipv4, public: null}, disk_info_cloud: guest_disk_info }\") }}" diff --git a/cluster_hosts/tasks/get_cluster_hosts_state_gcp.yml b/cluster_hosts/tasks/get_cluster_hosts_state_gcp.yml index fc6a1083..38ba26db 100644 --- a/cluster_hosts/tasks/get_cluster_hosts_state_gcp.yml +++ b/cluster_hosts/tasks/get_cluster_hosts_state_gcp.yml @@ -23,4 +23,4 @@ {%- endfor -%} {{ res }} vars: - _cluster_hosts_state__urlregion: "{{r__gcp_compute_instance_info.results | json_query(\"[?resources[?labels]].resources[].{name: name, regionzone: zone, tagslabels: labels, instance_id: id, instance_state: status, ipv4: {private: networkInterfaces[0].networkIP, public: networkInterfaces[0].accessConfigs[0].natIP} }\") }}" + _cluster_hosts_state__urlregion: "{{r__gcp_compute_instance_info.results | json_query(\"[?resources[?labels]].resources[].{name: name, regionzone: zone, tagslabels: labels, instance_id: id, instance_state: status, ipv4: {private: networkInterfaces[0].networkIP, public: networkInterfaces[0].accessConfigs[0].natIP}, disk_info_cloud: disks }\") }}" diff --git a/redeploy/_scheme_rmvm_keepdisk_rollback/tasks/_add_src_diskinfo_to_cluster_hosts_target__aws.yml b/redeploy/_scheme_rmvm_keepdisk_rollback/tasks/_add_src_diskinfo_to_cluster_hosts_target__aws.yml index ed310e25..eb998768 100644 --- a/redeploy/_scheme_rmvm_keepdisk_rollback/tasks/_add_src_diskinfo_to_cluster_hosts_target__aws.yml +++ b/redeploy/_scheme_rmvm_keepdisk_rollback/tasks/_add_src_diskinfo_to_cluster_hosts_target__aws.yml @@ -1,27 +1,14 @@ --- -- name: _get_diskinfo_aws | ec2_instance_info - ec2_instance_info: - filters: - "instance-state-name": ["running", "stopped"] - "tag:cluster_name": "{{cluster_name}}" - aws_access_key: "{{cluster_vars[buildenv].aws_access_key}}" - aws_secret_key: "{{cluster_vars[buildenv].aws_secret_key}}" - region: "{{cluster_vars.region}}" - register: r__ec2_instance_info - -- name: _get_diskinfo_aws | r__ec2_instance_info - debug: msg={{r__ec2_instance_info}} - -- name: _get_diskinfo_aws | augment cluster_hosts_target auto_volumes with source disk info +- name: _add_src_diskinfo_to_cluster_hosts_target/aws | augment cluster_hosts_target auto_volumes with source disk info set_fact: cluster_hosts_target: | {%- for cht_host in cluster_hosts_target -%} {%- for cht_autovol in cht_host.auto_volumes -%} - {%- for ec2_instance_info_result in r__ec2_instance_info.instances | selectattr('tags.lifecycle_state', '!=', 'current') -%} - {%- if cht_host.hostname | regex_replace('-(?!.*-).*') == ec2_instance_info_result.tags.Name | regex_replace('-(?!.*-).*') -%} - {%- for chs_host_diskinfo in ec2_instance_info_result.block_device_mappings | selectattr('device_name', '==', cht_autovol.device_name) | selectattr('device_name', '!=', '/dev/sda1') -%} - {%- set _ = cht_autovol.update({'src': {'instance_id': ec2_instance_info_result.instance_id, 'device_name': chs_host_diskinfo.device_name, 'volume_id': chs_host_diskinfo.ebs.volume_id }}) -%} + {%- for chs_host in cluster_hosts_state | selectattr('tagslabels.lifecycle_state', '!=', 'current') -%} + {%- if cht_host.hostname | regex_replace('-(?!.*-).*') == chs_host.tagslabels.Name | regex_replace('-(?!.*-).*') -%} + {%- for chs_host_diskinfo in chs_host.disk_info_cloud | selectattr('device_name', '==', cht_autovol.device_name) | selectattr('device_name', '!=', '/dev/sda1') -%} + {%- set _ = cht_autovol.update({'src': {'instance_id': chs_host.instance_id, 'device_name': chs_host_diskinfo.device_name, 'volume_id': chs_host_diskinfo.ebs.volume_id }}) -%} {%- endfor -%} {%- endif -%} {%- endfor -%} @@ -29,5 +16,5 @@ {%- endfor -%} {{cluster_hosts_target}} -- name: _get_diskinfo_aws | cluster_hosts_target +- name: _add_src_diskinfo_to_cluster_hosts_target/aws | cluster_hosts_target debug: msg={{cluster_hosts_target}} diff --git a/redeploy/_scheme_rmvm_keepdisk_rollback/tasks/_add_src_diskinfo_to_cluster_hosts_target__esxifree.yml b/redeploy/_scheme_rmvm_keepdisk_rollback/tasks/_add_src_diskinfo_to_cluster_hosts_target__esxifree.yml index 1717764d..85a07832 100644 --- a/redeploy/_scheme_rmvm_keepdisk_rollback/tasks/_add_src_diskinfo_to_cluster_hosts_target__esxifree.yml +++ b/redeploy/_scheme_rmvm_keepdisk_rollback/tasks/_add_src_diskinfo_to_cluster_hosts_target__esxifree.yml @@ -1,30 +1,19 @@ --- -- name: _get_diskinfo_esxifree | vmware_guest_disk_info - vmware_guest_disk_info: - username: "{{ cluster_vars.username }}" - password: "{{ cluster_vars.password }}" - hostname: "{{ cluster_vars.esxi_ip }}" - datacenter: ha-datacenter - validate_certs: no - name: "{{item.name}}" - with_items: "{{cluster_hosts_state}}" - register: r__vmware_guest_disk_info +- name: _add_src_diskinfo_to_cluster_hosts_target/esxifree | cluster_hosts_state + debug: msg={{cluster_hosts_state}} -#- name: _get_diskinfo_esxifree | r__vmware_guest_disk_info -# debug: msg={{r__vmware_guest_disk_info}} - -- assert: { that: "r__vmware_guest_disk_info | json_query(\"results[].guest_disk_info.*[] | [?backing_datastore!='\" + cluster_vars.datastore + \"']\") | length == 0", msg: "Move is only possible if disks are on the same datastore." } +- assert: { that: "cluster_hosts_state | json_query(\"[].disk_info_cloud.*[] | [?backing_datastore!='\" + cluster_vars.datastore + \"']\") | length == 0", msg: "Move is only possible if disks are on the same datastore." } when: _scheme_rmvm_keepdisk_rollback__copy_or_move == "move" -- name: _get_diskinfo_esxifree | augment cluster_hosts_target auto_volumes with source disk info +- name: _add_src_diskinfo_to_cluster_hosts_target/esxifree | augment cluster_hosts_target auto_volumes with source disk info set_fact: cluster_hosts_target: | {%- for cht_host in cluster_hosts_target -%} {%- for cht_autovol in cht_host.auto_volumes -%} - {%- for vmware_guest_disk_info_result in r__vmware_guest_disk_info.results | selectattr('item.tagslabels.lifecycle_state', '!=', 'current')-%} - {%- if cht_host.hostname | regex_replace('-(?!.*-).*') == vmware_guest_disk_info_result.item.name | regex_replace('-(?!.*-).*') -%} - {%- for chs_host_diskinfo in vmware_guest_disk_info_result.guest_disk_info | to_json | from_json | json_query('*| [?unit_number!=`0` && backing_type==\'FlatVer2\' && contains(backing_filename, \'--' + cht_autovol.volname + '.vmdk\')]') -%} + {%- for chs_vm in cluster_hosts_state | selectattr('tagslabels.lifecycle_state', '!=', 'current')-%} + {%- if cht_host.hostname | regex_replace('-(?!.*-).*') == chs_vm.name | regex_replace('-(?!.*-).*') -%} + {%- for chs_host_diskinfo in chs_vm.disk_info_cloud | to_json | from_json | json_query('[?unit_number!=`0` && backing_type==\'FlatVer2\' && contains(backing_filename, \'--' + cht_autovol.volname + '.vmdk\')]') -%} {%- set _ = cht_autovol.update({'volume_size': (chs_host_diskinfo.capacity_in_bytes/1073741824)|int, 'src': {'backing_filename': chs_host_diskinfo.backing_filename, 'copy_or_move': _scheme_rmvm_keepdisk_rollback__copy_or_move }}) -%} {%- endfor -%} {%- endif -%} @@ -33,5 +22,5 @@ {%- endfor -%} {{cluster_hosts_target}} -#- name: _get_diskinfo_esxifree | cluster_hosts_target -# debug: msg={{cluster_hosts_target}} +- name: _add_src_diskinfo_to_cluster_hosts_target/esxifree | cluster_hosts_target + debug: msg={{cluster_hosts_target}} diff --git a/redeploy/_scheme_rmvm_keepdisk_rollback/tasks/_add_src_diskinfo_to_cluster_hosts_target__gcp.yml b/redeploy/_scheme_rmvm_keepdisk_rollback/tasks/_add_src_diskinfo_to_cluster_hosts_target__gcp.yml index 5b278a9f..0176e933 100644 --- a/redeploy/_scheme_rmvm_keepdisk_rollback/tasks/_add_src_diskinfo_to_cluster_hosts_target__gcp.yml +++ b/redeploy/_scheme_rmvm_keepdisk_rollback/tasks/_add_src_diskinfo_to_cluster_hosts_target__gcp.yml @@ -1,32 +1,17 @@ --- -- name: _get_diskinfo_gcp | Get existing GCE instance info (per AZ) - gcp_compute_instance_info: - zone: "{{cluster_vars.region}}-{{item}}" - filters: - - "labels.cluster_name = {{cluster_name}}" - project: "{{cluster_vars[buildenv].vpc_project_id}}" - auth_kind: "serviceaccount" - service_account_file: "{{gcp_credentials_file}}" - scopes: ["https://www.googleapis.com/auth/compute.readonly"] - with_items: "{{ cluster_vars[buildenv].hosttype_vars | json_query(\"*[vms_by_az][][keys(@)][][]\") | unique }}" - register: r__gcp_compute_instance_info - -- name: _get_diskinfo_gcp | r__gcp_compute_instance_info.results - debug: msg={{r__gcp_compute_instance_info.results}} - -- name: _get_diskinfo_gcp | augment/update cluster_hosts_target auto_volumes with source disk info +- name: _add_src_diskinfo_to_cluster_hosts_target/gcp | augment/update cluster_hosts_target auto_volumes with source disk info set_fact: cluster_hosts_target: | {%- for cht_host in cluster_hosts_target -%} {%- for cht_autovol in cht_host.auto_volumes -%} - {%- for gcp_compute_instance_result in r__gcp_compute_instance_info.results | json_query('[].resources[?labels.lifecycle_state != "current"][]') -%} - {%- if cht_host.hostname | regex_replace('-(?!.*-).*') == gcp_compute_instance_result.name | regex_replace('-(?!.*-).*') -%} - {%- for gcp_compute_instance_diskinfo in gcp_compute_instance_result.disks -%} - {%- if cht_autovol.initialize_params.disk_name | regex_replace('(.*)-.*(--.*)', '\\1\\2') == gcp_compute_instance_diskinfo.source | basename | regex_replace('(.*)-.*(--.*)', '\\1\\2') -%} - {%- set _ = cht_autovol.update({'device_name': gcp_compute_instance_diskinfo.source | basename}) -%} - {%- set _ = cht_autovol.update({'src': {'hostname': gcp_compute_instance_result.name, 'device_name': cht_autovol.device_name, 'source_url': gcp_compute_instance_diskinfo.source }}) -%} - {%- set _ = cht_autovol.update({'initialize_params': {'disk_name': cht_autovol.device_name, 'disk_size_gb': gcp_compute_instance_diskinfo.diskSizeGb}}) -%} + {%- for chs_host in cluster_hosts_state | json_query('[?tagslabels.lifecycle_state != "current"]') -%} + {%- if cht_host.hostname | regex_replace('-(?!.*-).*') == chs_host.name | regex_replace('-(?!.*-).*') -%} + {%- for chs_host_diskinfo in chs_host.disk_info_cloud -%} + {%- if cht_autovol.initialize_params.disk_name | regex_replace('(.*)-.*(--.*)', '\\1\\2') == chs_host_diskinfo.source | basename | regex_replace('(.*)-.*(--.*)', '\\1\\2') -%} + {%- set _ = cht_autovol.update({'device_name': chs_host_diskinfo.source | basename}) -%} + {%- set _ = cht_autovol.update({'src': {'hostname': chs_host.name, 'device_name': cht_autovol.device_name, 'source_url': chs_host_diskinfo.source }}) -%} + {%- set _ = cht_autovol.update({'initialize_params': {'disk_name': cht_autovol.device_name, 'disk_size_gb': chs_host_diskinfo.diskSizeGb}}) -%} {%- endif -%} {%- endfor -%} {%- endif -%} @@ -35,5 +20,5 @@ {%- endfor -%} {{cluster_hosts_target}} -- name: _get_diskinfo_gcp | cluster_hosts_target +- name: _add_src_diskinfo_to_cluster_hosts_target/gcp | cluster_hosts_target debug: msg={{cluster_hosts_target}} From 9e37293b62b82bde4869210c99be275cef0f955c Mon Sep 17 00:00:00 2001 From: Dougal Seeley Date: Tue, 6 Apr 2021 23:15:06 +0100 Subject: [PATCH 55/58] update esxifree_guest.py; add esxifree examples --- EXAMPLE/README.md | 5 ++ .../cluster_vars__buildenv.yml | 0 .../esxifree/cluster_vars__cloud.yml | 9 +++ .../testid/cluster_vars__clusterid.yml | 25 +++++++ .../testid/homelab/cluster_vars__region.yml | 13 ++++ .../sandbox/cluster_vars__buildenv.yml | 37 ++++++++++ README.md | 7 +- _dependencies/library/esxifree_guest.py | 73 ++++++++++--------- clean/tasks/esxifree.yml | 2 +- 9 files changed, 133 insertions(+), 38 deletions(-) rename EXAMPLE/cluster_defs/azure/testid/westeurope/{mgmt => sandbox}/cluster_vars__buildenv.yml (100%) create mode 100644 EXAMPLE/cluster_defs/esxifree/cluster_vars__cloud.yml create mode 100644 EXAMPLE/cluster_defs/esxifree/testid/cluster_vars__clusterid.yml create mode 100644 EXAMPLE/cluster_defs/esxifree/testid/homelab/cluster_vars__region.yml create mode 100644 EXAMPLE/cluster_defs/esxifree/testid/homelab/sandbox/cluster_vars__buildenv.yml diff --git a/EXAMPLE/README.md b/EXAMPLE/README.md index 25776033..b7fb7276 100644 --- a/EXAMPLE/README.md +++ b/EXAMPLE/README.md @@ -29,6 +29,11 @@ ansible-playbook cluster.yml -e buildenv=sandbox -e clusterid=testid -e cloud_ty ansible-playbook cluster.yml -e buildenv=sandbox -e clusterid=test_gcp_euw1 --vault-id=sandbox@.vaultpass-client.py ansible-playbook cluster.yml -e buildenv=sandbox -e clusterid=test_gcp_euw1 --vault-id=sandbox@.vaultpass-client.py --tags=clusterverse_clean -e clean=_all_ ``` +### ESXi (free): +``` +ansible-playbook cluster.yml -e buildenv=sandbox -e clusterid=testid -e cloud_type=esxifree -e region=homelab --vault-id=sandbox@.vaultpass-client.py +ansible-playbook cluster.yml -e buildenv=sandbox -e clusterid=testid -e cloud_type=esxifree -e region=homelab --vault-id=sandbox@.vaultpass-client.py --tags=clusterverse_clean -e clean=_all_ +``` ### Azure: ``` ansible-playbook cluster.yml -e buildenv=sandbox -e clusterid=testid -e cloud_type=azure -e region=westeurope --vault-id=sandbox@.vaultpass-client.py diff --git a/EXAMPLE/cluster_defs/azure/testid/westeurope/mgmt/cluster_vars__buildenv.yml b/EXAMPLE/cluster_defs/azure/testid/westeurope/sandbox/cluster_vars__buildenv.yml similarity index 100% rename from EXAMPLE/cluster_defs/azure/testid/westeurope/mgmt/cluster_vars__buildenv.yml rename to EXAMPLE/cluster_defs/azure/testid/westeurope/sandbox/cluster_vars__buildenv.yml diff --git a/EXAMPLE/cluster_defs/esxifree/cluster_vars__cloud.yml b/EXAMPLE/cluster_defs/esxifree/cluster_vars__cloud.yml new file mode 100644 index 00000000..da086063 --- /dev/null +++ b/EXAMPLE/cluster_defs/esxifree/cluster_vars__cloud.yml @@ -0,0 +1,9 @@ +--- + +_scheme_rmvm_keepdisk_rollback__copy_or_move: "move" + +cluster_vars: + dns_cloud_internal_domain: "" # The cloud-internal zone as defined by the cloud provider (e.g. GCP, AWS) + dns_server: "" # Specify DNS server. nsupdate, route53 or clouddns. If empty string is specified, no DNS will be added. + inventory_ip: "private" # 'public' or 'private', (private in case we're operating in a private LAN). If public, 'assign_public_ip' must be 'yes' + hardware_version: "19" diff --git a/EXAMPLE/cluster_defs/esxifree/testid/cluster_vars__clusterid.yml b/EXAMPLE/cluster_defs/esxifree/testid/cluster_vars__clusterid.yml new file mode 100644 index 00000000..79499d24 --- /dev/null +++ b/EXAMPLE/cluster_defs/esxifree/testid/cluster_vars__clusterid.yml @@ -0,0 +1,25 @@ +--- + +prometheus_node_exporter_install: false +filebeat_install: false +metricbeat_install: false + +beats_config: + filebeat: +# output_logstash_hosts: ["localhost:5044"] # The destination hosts for filebeat-gathered logs +# extra_logs_paths: # The array is optional, if you need to add more paths or files to scrape for logs +# - /var/log/myapp/*.log + metricbeat: +# output_logstash_hosts: ["localhost:5044"] # The destination hosts for metricbeat-gathered metrics +# diskio: # Diskio retrieves metrics for all disks partitions by default. When diskio.include_devices is defined, only look for defined partitions +# include_devices: ["sda", "sdb", "nvme0n1", "nvme1n1", "nvme2n1"] + + +cluster_vars: + dns_nameserver_zone: &dns_nameserver_zone "" # The zone that dns_server will operate on. gcloud dns needs a trailing '.'. Leave blank if no external DNS (use IPs only) + dns_user_domain: "{%- if _dns_nameserver_zone -%}{{cloud_type}}-{{region}}.{{app_class}}.{{buildenv}}.{{_dns_nameserver_zone}}{%- endif -%}" # A user-defined _domain_ part of the FDQN, (if more prefixes are required before the dns_nameserver_zone) + custom_tagslabels: + inv_resident_id: "myresident" + inv_proposition_id: "myproposition" + inv_cost_centre: "0000000000" +_dns_nameserver_zone: *dns_nameserver_zone diff --git a/EXAMPLE/cluster_defs/esxifree/testid/homelab/cluster_vars__region.yml b/EXAMPLE/cluster_defs/esxifree/testid/homelab/cluster_vars__region.yml new file mode 100644 index 00000000..7e71aae9 --- /dev/null +++ b/EXAMPLE/cluster_defs/esxifree/testid/homelab/cluster_vars__region.yml @@ -0,0 +1,13 @@ +--- + +_ubuntu2004image: "gold-ubuntu2004l-20210324080648" +_centos7image: "gold-ubuntu2004l-20210324080648" + +cluster_vars: + image: "{{_ubuntu2004image}}" + esxi_ip: "192.168.1.3" + username: "svc" + password: !vault | + $ANSIBLE_VAULT;1.1;AES256 + 7669080460651349243347331538721104778691266429457726036813912140404310 + datastore: "4tb-evo860-ssd" diff --git a/EXAMPLE/cluster_defs/esxifree/testid/homelab/sandbox/cluster_vars__buildenv.yml b/EXAMPLE/cluster_defs/esxifree/testid/homelab/sandbox/cluster_vars__buildenv.yml new file mode 100644 index 00000000..169da345 --- /dev/null +++ b/EXAMPLE/cluster_defs/esxifree/testid/homelab/sandbox/cluster_vars__buildenv.yml @@ -0,0 +1,37 @@ +--- + +cluster_vars: + sandbox: + ssh_connection_cfg: + host: &host_ssh_connection_cfg + ansible_user: "ansible" + ansible_ssh_private_key_file: !vault | + $ANSIBLE_VAULT;1.2;AES256;sandbox + 7669080460651349243347331538721104778691266429457726036813912140404310 +# bastion: +# ssh_args: '-o ProxyCommand="ssh -o StrictHostKeyChecking=no -o UserKnownHostsFile=/dev/null -i ./id_rsa_bastion -W %h:%p -q user@192.168.0.1"' +# ssh_priv_key: !vault | +# $ANSIBLE_VAULT;1.2;AES256;sandbox +# 7669080460651349243347331538721104778691266429457726036813912140404310 + networks: + - networkName: "VM Network" + virtualDev: vmxnet3 + cloudinit_netplan: { ethernets: { eth0: { dhcp4: true } } } +# nsupdate_cfg: {server: "", key_name: "", key_secret: ""} # If you're using bind9 (or other nsupdate-compatible 'dns_server') + + hosttype_vars: + sys: + auto_volumes: [ ] + flavor: { num_cpus: "2", memory_mb: "2048" } + version: "{{sys_version | default('')}}" + vms_by_az: { a: 1, b: 1, c: 0 } + + sysdisks2: + auto_volumes: + - { mountpoint: "/media/mysvc1", volume_size: 1, provisioning_type: "thin", fstype: "ext4" } + - { mountpoint: "/media/mysvc2", volume_size: 1, provisioning_type: "thin", fstype: "ext4" } + flavor: { num_cpus: "2", memory_mb: "2048" } + version: "{{sys_version | default('')}}" + vms_by_az: { a: 1, b: 1, c: 0 } + +_host_ssh_connection_cfg: { <<: *host_ssh_connection_cfg } diff --git a/README.md b/README.md index 931215f9..322b2596 100644 --- a/README.md +++ b/README.md @@ -1,6 +1,6 @@ # clusterverse   [![License](https://img.shields.io/badge/License-BSD%203--Clause-blue.svg)](https://opensource.org/licenses/BSD-3-Clause) ![PRs Welcome](https://img.shields.io/badge/PRs-Welcome-brightgreen.svg) A full-lifecycle, immutable cloud infrastructure cluster management **role**, using Ansible. -+ **Multi-cloud:** clusterverse can manage cluster lifecycle in AWS, GCP and Azure ++ **Multi-cloud:** clusterverse can manage cluster lifecycle in AWS, GCP, Free ESXi (standalone host only, not vCentre) and Azure + **Deploy:** You define your infrastructure as code (in Ansible yaml), and clusterverse will deploy it + **Scale-up:** If you change the cluster definitions and rerun the deploy, new nodes will be added. + **Redeploy (e.g. up-version):** If you need to up-version, or replace the underlying OS, (i.e. to achieve fully immutable, zero-patching redeploys), the `redeploy.yml` playbook will replace each node in the cluster (via various redeploy schemes), and rollback if any failures occur. @@ -38,6 +38,11 @@ To active the pipenv: + During execution, the json file will be copied locally because the Ansible GCP modules often require the file as input. + Google Cloud SDK needs to be installed to run gcloud command-line (e.g. to disable delete protection) - this is handled by `pipenv install` +### ESXi (free) ++ Username & password for a privileged user on an ESXi host ++ SSH must be enabled on the host ++ Set the `Config.HostAgent.vmacore.soap.maxSessionCount` variable to 0 to allow many concurrent tests to run. + ### Azure + Create an Azure account. + Create a Tenant and a Subscription diff --git a/_dependencies/library/esxifree_guest.py b/_dependencies/library/esxifree_guest.py index e8315dbc..12afb606 100644 --- a/_dependencies/library/esxifree_guest.py +++ b/_dependencies/library/esxifree_guest.py @@ -59,7 +59,7 @@ then the specified virtual machine is powered off.' - 'If C(state) is set to C(shutdownguest) and virtual machine exists, then the virtual machine is shutdown.' - 'If C(state) is set to C(rebootguest) and virtual machine exists, then the virtual machine is rebooted.' - choices: [ present, absent, poweredon, poweredoff, shutdownguest, rebootguest ] + choices: [ present, absent, poweredon, poweredoff, shutdownguest, rebootguest, unchanged ] default: present name: description: @@ -526,12 +526,12 @@ def __init__(self, hostname, username='root', password=None, name=None, moid=Non self.name = name def get_vm(self, name=None, moid=None): - (stdin, stdout, stderr) = self.esxiCnx.exec_command("vim-cmd vmsvc/getallvms") - allVms = stdout.readlines() + response, cookies = self.soap_client.send_req('<_this type="PropertyCollector">ha-property-collectorVirtualMachinefalsenameha-folder-vmtraverseChildFolderchildEntity traverseChildDatacentervmFoldertraverseChild ') + xmltodictresponse = xmltodict.parse(response.read()) + allVms = [{'moid': a['obj']['#text'], 'name': a['propSet']['val']['#text']} for a in xmltodictresponse['soapenv:Envelope']['soapenv:Body']['RetrievePropertiesExResponse']['returnval']['objects']] for vm in allVms: - vm_params = re.search('^(?P\d+)\s+(?P.*?)\s+(?P\[.*?\])\s+(?P.*?)\s+(?P.*?)\s+(?P.*?)(:\s+(?P.*))?$', vm) - if vm_params and vm_params.group('vmname') and vm_params.group('vmid') and ((name and name == vm_params.group('vmname')) or (moid and moid == vm_params.group('vmid'))): - return vm_params.group('vmname'), vm_params.group('vmid') + if ((name and name == vm['name']) or (moid and moid == vm['moid'])): + return vm['name'], vm['moid'] return None, None def get_vmx(self, moid): @@ -902,45 +902,46 @@ class cDummyAnsibleModule(): # "wait_timeout": 180, # } - ## Clone VM + # ## Clone VM + # params = { + # "annotation": None, + # # "annotation": "{'lifecycle_state': 'current', 'Name': 'test-prod-sys-a0-1589979249', 'cluster_suffix': '1589979249', 'hosttype': 'sys', 'cluster_name': 'test-prod', 'env': 'prod', 'owner': 'dougal'}", + # "cdrom": {"type": "client"}, + # "cloudinit_userdata": [], + # "customvalues": [], + # "datastore": "4tb-evo860-ssd", + # "disks": [], + # # "disks": [{"size_gb": 1, "type": "thin", "volname": "test"}], + # # "disks": [{"size_gb": 1, "type": "thin", "volname": "test", "src": {"backing_filename": "[4tb-evo860-ssd] testdisks-dev-sys-a0-1601204786/testdisks-dev-sys-a0-1601204786--test.vmdk", "copy_or_move": "move"}}], + # "force": False, + # "guest_id": "ubuntu-64", + # "hardware": {"memory_mb": "2048", "num_cpus": "2", "version": "15"}, + # "hostname": "192.168.1.3", + # "moid": None, + # "name": "dougal-test-dev-sys-a0-new", + # "networks": [{"cloudinit_netplan": {"ethernets": {"eth0": {"dhcp4": True}}}, "networkName": "VM Network", "virtualDev": "vmxnet3"}], + # "password": sys.argv[2], + # "state": "present", + # "template": "dougal-test-dev-sys-a0-1617553110", + # "username": "svc", + # "wait": True, + # "wait_timeout": 180 + # } + + # ## Update VM params = { + # "annotation": "{'Name': 'dougal-test-dev-sysdisks2-a0-1617548508', 'hosttype': 'sysdisks2', 'env': 'dev', 'cluster_name': 'dougal-test-dev', 'owner': 'dougal', 'cluster_suffix': '1617548508', 'lifecycle_state': 'retiring', 'maintenance_mode': 'false'}", "annotation": None, - # "annotation": "{'lifecycle_state': 'current', 'Name': 'test-prod-sys-a0-1589979249', 'cluster_suffix': '1589979249', 'hosttype': 'sys', 'cluster_name': 'test-prod', 'env': 'prod', 'owner': 'dougal'}", - "cdrom": {"type": "client"}, - "cloudinit_userdata": [], - "customvalues": [], - "datastore": "4tb-evo860-ssd", - "disks": [], - # "disks": [{"size_gb": 1, "type": "thin", "volname": "test"}], - # "disks": [{"size_gb": 1, "type": "thin", "volname": "test", "src": {"backing_filename": "[4tb-evo860-ssd] testdisks-dev-sys-a0-1601204786/testdisks-dev-sys-a0-1601204786--test.vmdk", "copy_or_move": "move"}}], - "force": False, - "guest_id": "ubuntu-64", - "hardware": {"memory_mb": "2048", "num_cpus": "2", "version": "15"}, + "disks": None, "hostname": "192.168.1.3", + "name": "cvtest-16-dd9032f65aef7-dev-sys-b0-1617726990", "moid": None, - "name": "dougal-test-dev-sys-a0-new", - "networks": [{"cloudinit_netplan": {"ethernets": {"eth0": {"dhcp4": True}}}, "networkName": "VM Network", "virtualDev": "vmxnet3"}], "password": sys.argv[2], - "state": "present", - "template": "dougal-test-dev-sys-a0-1617553110", + "state": "unchanged", "username": "svc", - "wait": True, "wait_timeout": 180 } - # ## Poweroff VM - # params = { - # # "annotation": "{'Name': 'dougal-test-dev-sysdisks2-a0-1617548508', 'hosttype': 'sysdisks2', 'env': 'dev', 'cluster_name': 'dougal-test-dev', 'owner': 'dougal', 'cluster_suffix': '1617548508', 'lifecycle_state': 'retiring', 'maintenance_mode': 'false'}", - # "disks": None, - # "hostname": "192.168.1.3", - # "name": "dougal-test-dev-sysdisks2-a0-1617548508", - # "moid": None, - # "password": sys.argv[2], - # "state": "poweredoff", - # "username": "svc", - # "wait_timeout": 180 - # } - ## Delete VM # params = { # "hostname": "192.168.1.3", diff --git a/clean/tasks/esxifree.yml b/clean/tasks/esxifree.yml index ea542172..18ef67bd 100644 --- a/clean/tasks/esxifree.yml +++ b/clean/tasks/esxifree.yml @@ -9,9 +9,9 @@ password: "{{ cluster_vars.password }}" name: "{{item.name}}" state: absent + with_items: "{{hosts_to_clean}}" register: esxi_instances run_once: true - with_items: "{{hosts_to_clean}}" async: 7200 poll: 0 From 2c5e51d254300e3424d3a817b3fe258134e02517 Mon Sep 17 00:00:00 2001 From: Dougal Seeley Date: Wed, 7 Apr 2021 07:24:03 +0100 Subject: [PATCH 56/58] Update esxifree_guest_info.py --- _dependencies/library/esxifree_guest_info.py | 8 +++----- 1 file changed, 3 insertions(+), 5 deletions(-) diff --git a/_dependencies/library/esxifree_guest_info.py b/_dependencies/library/esxifree_guest_info.py index 7f4e0c4f..46a28a3d 100644 --- a/_dependencies/library/esxifree_guest_info.py +++ b/_dependencies/library/esxifree_guest_info.py @@ -155,10 +155,8 @@ def get_all_vm_info(self): def _getObjSafe(self, inDict, *keys): for key in keys: - try: - inDict = inDict[key] - except KeyError: - return None + try: inDict = inDict[key] + except KeyError: return None return inDict def parse_vm(self, vmObj): @@ -171,7 +169,7 @@ def parse_vm(self, vmObj): newObj.update({"advanced_settings": {advObj['key']: advObj['value'].get('#text') for advObj in configObj['extraConfig']}}) newObj.update({"annotation": configObj['annotation']}) newObj.update({"consolidationNeeded": runtimeObj['consolidationNeeded']}) - newObj.update({"guest_tools_status": guestObj['toolsRunningStatus']}) + newObj.update({"guest_tools_status": guestObj['toolsRunningStatus'] if 'toolsRunningStatus' in guestObj else None}) newObj.update({"guest_tools_version": guestObj['toolsVersion'] if 'toolsVersion' in guestObj else None}) newObj.update({"hw_cores_per_socket": configObj['hardware']['numCoresPerSocket']}) newObj.update({"hw_datastores": [configObj['datastoreUrl']['name']]}) From 41161c47c485bb85fca4ff964d8fa150d79454da Mon Sep 17 00:00:00 2001 From: Dougal Seeley Date: Wed, 7 Apr 2021 18:11:26 +0100 Subject: [PATCH 57/58] Update esxifree_guest libraries to add retries to soap requests --- _dependencies/library/esxifree_guest.py | 187 ++++++++++--------- _dependencies/library/esxifree_guest_info.py | 58 +++--- 2 files changed, 133 insertions(+), 112 deletions(-) diff --git a/_dependencies/library/esxifree_guest.py b/_dependencies/library/esxifree_guest.py index 12afb606..8a7e736f 100644 --- a/_dependencies/library/esxifree_guest.py +++ b/_dependencies/library/esxifree_guest.py @@ -16,8 +16,8 @@ module: esxifree_guest short_description: Manages virtual machines in ESXi without a dependency on the vSphere/ vCenter API. description: > - This module can be used to create new virtual machines from templates or other virtual machines, - manage power state of virtual machine such as power on, power off, suspend, shutdown, reboot, restart etc., + This module can be used to create new virtual machines from scratch or from templates or other virtual machines (i.e. clone them), + delete, or manage the power state of virtual machine such as power on, power off, suspend, shutdown, reboot, restart etc., version_added: '2.7' author: - Dougal Seeley (ansible@dougalseeley.com) @@ -59,6 +59,7 @@ then the specified virtual machine is powered off.' - 'If C(state) is set to C(shutdownguest) and virtual machine exists, then the virtual machine is shutdown.' - 'If C(state) is set to C(rebootguest) and virtual machine exists, then the virtual machine is rebooted.' + - 'If C(state) is set to C(unchanged) the state of the VM will not change (if it's on/off, it will stay so). Used for updating annotations.' choices: [ present, absent, poweredon, poweredoff, shutdownguest, rebootguest, unchanged ] default: present name: @@ -374,8 +375,15 @@ try: from ansible.module_utils.basic import AnsibleModule except: - pass - + # For testing without Ansible (e.g on Windows) + class cDummyAnsibleModule(): + def __init__(self): + self.params={} + def exit_json(self, changed, **kwargs): + print(changed, json.dumps(kwargs, sort_keys=True, indent=4, separators=(',', ': '))) + def fail_json(self, msg): + print("Failed: " + msg) + exit(1) # Executes soap requests on the remote host. class vmw_soap_client(object): @@ -397,10 +405,20 @@ def send_req(self, envelope_body=None): headers={"Content-Type": "text/xml", "SOAPAction": "urn:vim25/6.7.3", "Accept": "*/*", "Cookie": "vmware_client=VMware; vmware_soap_session=" + str(self.vmware_soap_session_cookie)}) opener = build_opener(HTTPSHandler(context=ssl._create_unverified_context()), HTTPCookieProcessor(cj)) - try: - response = opener.open(req, timeout=30) - except HTTPError as err: - response = str(err) + num_send_attempts = 3 + for send_attempt in range(num_send_attempts): + try: + response = opener.open(req, timeout=30) + except HTTPError as err: + response = str(err) + except: + if send_attempt < num_send_attempts - 1: + time.sleep(1) + continue + else: + raise + break + cookies = {i.name: i for i in list(cj)} return (response[0] if isinstance(response, list) else response, cookies) # If the cookiejar contained anything, we get a list of two responses @@ -878,88 +896,79 @@ def main(): module = AnsibleModule(argument_spec=argument_spec, supports_check_mode=True, required_one_of=[['name', 'moid']]) else: # For testing without Ansible (e.g on Windows) - class cDummyAnsibleModule(): - ## Create blank VM - # params = { - # "hostname": "192.168.1.3", - # "username": "svc", - # "password": None, - # "name": "test-asdf", - # "moid": None, - # "template": None, - # "state": "present", - # "force": False, - # "datastore": "4tb-evo860-ssd", - # "annotation": "{'Name': 'test-asdf'}", - # "guest_id": "ubuntu-64", - # "hardware": {"version": "15", "num_cpus": "2", "memory_mb": "2048"}, - # "cloudinit_userdata": [], - # "disks": [{"boot": True, "size_gb": 16, "type": "thin"}, {"size_gb": 5, "type": "thin"}, {"size_gb": 2, "type": "thin"}], - # "cdrom": {"type": "iso", "iso_path": "/vmfs/volumes/4tb-evo860-ssd/ISOs/ubuntu-18.04.2-server-amd64.iso"}, - # "networks": [{"networkName": "VM Network", "virtualDev": "vmxnet3"}], - # "customvalues": [], - # "wait": True, - # "wait_timeout": 180, - # } - - # ## Clone VM - # params = { - # "annotation": None, - # # "annotation": "{'lifecycle_state': 'current', 'Name': 'test-prod-sys-a0-1589979249', 'cluster_suffix': '1589979249', 'hosttype': 'sys', 'cluster_name': 'test-prod', 'env': 'prod', 'owner': 'dougal'}", - # "cdrom": {"type": "client"}, - # "cloudinit_userdata": [], - # "customvalues": [], - # "datastore": "4tb-evo860-ssd", - # "disks": [], - # # "disks": [{"size_gb": 1, "type": "thin", "volname": "test"}], - # # "disks": [{"size_gb": 1, "type": "thin", "volname": "test", "src": {"backing_filename": "[4tb-evo860-ssd] testdisks-dev-sys-a0-1601204786/testdisks-dev-sys-a0-1601204786--test.vmdk", "copy_or_move": "move"}}], - # "force": False, - # "guest_id": "ubuntu-64", - # "hardware": {"memory_mb": "2048", "num_cpus": "2", "version": "15"}, - # "hostname": "192.168.1.3", - # "moid": None, - # "name": "dougal-test-dev-sys-a0-new", - # "networks": [{"cloudinit_netplan": {"ethernets": {"eth0": {"dhcp4": True}}}, "networkName": "VM Network", "virtualDev": "vmxnet3"}], - # "password": sys.argv[2], - # "state": "present", - # "template": "dougal-test-dev-sys-a0-1617553110", - # "username": "svc", - # "wait": True, - # "wait_timeout": 180 - # } - - # ## Update VM - params = { - # "annotation": "{'Name': 'dougal-test-dev-sysdisks2-a0-1617548508', 'hosttype': 'sysdisks2', 'env': 'dev', 'cluster_name': 'dougal-test-dev', 'owner': 'dougal', 'cluster_suffix': '1617548508', 'lifecycle_state': 'retiring', 'maintenance_mode': 'false'}", - "annotation": None, - "disks": None, - "hostname": "192.168.1.3", - "name": "cvtest-16-dd9032f65aef7-dev-sys-b0-1617726990", - "moid": None, - "password": sys.argv[2], - "state": "unchanged", - "username": "svc", - "wait_timeout": 180 - } - - ## Delete VM - # params = { - # "hostname": "192.168.1.3", - # "username": "svc", - # "password": None, - # "name": "test-asdf", - # "moid": None, - # "state": "absent" - # } - - def exit_json(self, changed, **kwargs): - print(changed, json.dumps(kwargs, sort_keys=True, indent=4, separators=(',', ': '))) - - def fail_json(self, msg): - print("Failed: " + msg) - exit(1) - module = cDummyAnsibleModule() + ## Update VM + module.params = { + "hostname": "192.168.1.3", + "username": "svc", + "password": sys.argv[2], + # "annotation": "{'Name': 'dougal-test-dev-sysdisks2-a0-1617548508', 'hosttype': 'sysdisks2', 'env': 'dev', 'cluster_name': 'dougal-test-dev', 'owner': 'dougal', 'cluster_suffix': '1617548508', 'lifecycle_state': 'retiring', 'maintenance_mode': 'false'}", + "annotation": None, + "disks": None, + "name": "cvtest-16-dd9032f65aef7-dev-sys-b0-1617726990", + "moid": None, + "state": "unchanged", + "wait_timeout": 180 + } + + # ## Delete VM + # module.params = { + # "hostname": "192.168.1.3", + # "username": "svc", + # "password": sys.argv[2], + # "name": "test-asdf", + # "moid": None, + # "state": "absent" + # } + # + # ## Clone VM + # module.params = { + # "hostname": "192.168.1.3", + # "username": "svc", + # "password": sys.argv[2], + # "annotation": None, + # # "annotation": "{'lifecycle_state': 'current', 'Name': 'test-prod-sys-a0-1589979249', 'cluster_suffix': '1589979249', 'hosttype': 'sys', 'cluster_name': 'test-prod', 'env': 'prod', 'owner': 'dougal'}", + # "cdrom": {"type": "client"}, + # "cloudinit_userdata": [], + # "customvalues": [], + # "datastore": "4tb-evo860-ssd", + # "disks": [], + # # "disks": [{"size_gb": 1, "type": "thin", "volname": "test"}], + # # "disks": [{"size_gb": 1, "type": "thin", "volname": "test", "src": {"backing_filename": "[4tb-evo860-ssd] testdisks-dev-sys-a0-1601204786/testdisks-dev-sys-a0-1601204786--test.vmdk", "copy_or_move": "move"}}], + # "force": False, + # "guest_id": "ubuntu-64", + # "hardware": {"memory_mb": "2048", "num_cpus": "2", "version": "15"}, + # "moid": None, + # "name": "dougal-test-dev-sys-a0-new", + # "networks": [{"cloudinit_netplan": {"ethernets": {"eth0": {"dhcp4": True}}}, "networkName": "VM Network", "virtualDev": "vmxnet3"}], + # "state": "present", + # "template": "dougal-test-dev-sys-a0-1617553110", + # "wait": True, + # "wait_timeout": 180 + # } + # + # ## Create blank VM + # module.params = { + # "hostname": "192.168.1.3", + # "username": "svc", + # "password": sys.argv[2], + # "name": "test-asdf", + # "annotation": "{'Name': 'test-asdf'}", + # "datastore": "4tb-evo860-ssd", + # "force": False, + # "moid": None, + # "template": None, + # "state": "present", + # "guest_id": "ubuntu-64", + # "hardware": {"version": "15", "num_cpus": "2", "memory_mb": "2048"}, + # "cloudinit_userdata": [], + # "disks": [{"boot": True, "size_gb": 16, "type": "thin"}, {"size_gb": 5, "type": "thin"}, {"size_gb": 2, "type": "thin"}], + # "cdrom": {"type": "iso", "iso_path": "/vmfs/volumes/4tb-evo860-ssd/ISOs/ubuntu-18.04.2-server-amd64.iso"}, + # "networks": [{"networkName": "VM Network", "virtualDev": "vmxnet3"}], + # "customvalues": [], + # "wait": True, + # "wait_timeout": 180, + # } iScraper = esxiFreeScraper(hostname=module.params['hostname'], username=module.params['username'], diff --git a/_dependencies/library/esxifree_guest_info.py b/_dependencies/library/esxifree_guest_info.py index 46a28a3d..e7da599b 100644 --- a/_dependencies/library/esxifree_guest_info.py +++ b/_dependencies/library/esxifree_guest_info.py @@ -44,13 +44,13 @@ type: str name: description: - - Name of the virtual machine to work with (optional). + - Name of the virtual machine to retrieve (optional). - Virtual machine names in ESXi are unique - This parameter is case sensitive. type: str moid: description: - - Managed Object ID of the virtual machine to manage + - Managed Object ID of the virtual machine (optional). type: str ''' EXAMPLES = r''' @@ -81,6 +81,7 @@ import json import re import sys +import time import xmltodict # For the soap client @@ -96,10 +97,19 @@ from httplib import HTTPResponse import ssl + try: from ansible.module_utils.basic import AnsibleModule except: - pass + # For testing without Ansible (e.g on Windows) + class cDummyAnsibleModule(): + def __init__(self): + self.params={} + def exit_json(self, changed, **kwargs): + print(changed, json.dumps(kwargs, sort_keys=True, indent=4, separators=(',', ': '))) + def fail_json(self, msg): + print("Failed: " + msg) + exit(1) # Executes soap requests on the remote host. @@ -122,10 +132,20 @@ def send_req(self, envelope_body=None): headers={"Content-Type": "text/xml", "SOAPAction": "urn:vim25/6.7.3", "Accept": "*/*", "Cookie": "vmware_client=VMware; vmware_soap_session=" + str(self.vmware_soap_session_cookie)}) opener = build_opener(HTTPSHandler(context=ssl._create_unverified_context()), HTTPCookieProcessor(cj)) - try: - response = opener.open(req, timeout=30) - except HTTPError as err: - response = str(err) + num_send_attempts = 3 + for send_attempt in range(num_send_attempts): + try: + response = opener.open(req, timeout=30) + except HTTPError as err: + response = str(err) + except: + if send_attempt < num_send_attempts - 1: + time.sleep(1) + continue + else: + raise + break + cookies = {i.name: i for i in list(cj)} return (response[0] if isinstance(response, list) else response, cookies) # If the cookiejar contained anything, we get a list of two responses @@ -226,23 +246,15 @@ def main(): module = AnsibleModule(argument_spec=argument_spec, supports_check_mode=True) else: # For testing without Ansible (e.g on Windows) - class cDummyAnsibleModule(): - params = { - "hostname": "192.168.1.3", - "username": "svc", - "password": sys.argv[2], - "name": None, # "parsnip-prod-sys-a0-1616868999", - "moid": None # 350 - } - - def exit_json(self, changed, **kwargs): - print(changed, json.dumps(kwargs, sort_keys=True, indent=2, separators=(',', ': '))) - - def fail_json(self, msg): - print("Failed: " + msg) - exit(1) - module = cDummyAnsibleModule() + ## Update VM + module.params = { + "hostname": "192.168.1.3", + "username": "svc", + "password": sys.argv[2], + "name": None, # "parsnip-prod-sys-a0-1616868999", + "moid": None # 350 + } iScraper = esxiFreeScraper(hostname=module.params['hostname'], username=module.params['username'], password=module.params['password']) From 2030f208e0e3461dd39750698de94310b847b056 Mon Sep 17 00:00:00 2001 From: Dougal Seeley Date: Sun, 11 Apr 2021 10:57:51 +0100 Subject: [PATCH 58/58] Update jenkinsfiles/Jenkinsfile_testsuite to support scaling up and down during redeploy --- README.md | 3 +- jenkinsfiles/Jenkinsfile_testsuite | 681 ++++++++++++++++------------- 2 files changed, 377 insertions(+), 307 deletions(-) diff --git a/README.md b/README.md index 322b2596..7dc45124 100644 --- a/README.md +++ b/README.md @@ -204,9 +204,8 @@ The role is designed to run in two modes: + It supports pluggable redeployment schemes. The following are provided: + **_scheme_rmvm_rmdisk_only** + This is a very basic rolling redeployment of the cluster. - + Canary **is not** supported. + _Supports redploying to bigger, but not smaller clusters_ - + **It assumes a resilient deployment (it can tolerate one node being deleted from the cluster). There is no rollback in case of failure.** + + **It assumes a resilient deployment (it can tolerate one node being deleted from the cluster). There is _no rollback_ in case of failure.** + For each node in the cluster: + Run `predeleterole` + Delete/ terminate the node (note, this is _irreversible_). diff --git a/jenkinsfiles/Jenkinsfile_testsuite b/jenkinsfiles/Jenkinsfile_testsuite index 6059e25a..d43df330 100644 --- a/jenkinsfiles/Jenkinsfile_testsuite +++ b/jenkinsfiles/Jenkinsfile_testsuite @@ -5,21 +5,20 @@ //This will not be needed if we're running this as a multibranch pipeline SCM job, as these are automatically added to the 'scm' variable, but if we instead just cut & paste this file into a pipeline job, they will be used as fallback def DEFAULT_CLUSTERVERSE_URL = "https://github.com/dseeley/clusterverse" -def DEFAULT_CLUSTERVERSE_BRANCH = "dps_esxi" +def DEFAULT_CLUSTERVERSE_BRANCH = "dps_add_azure" //Set the git branch for clusterverse_ops to either the PR branch (env.CHANGE_BRANCH), or the current SCM branch (env.BRANCH_NAME) def CV_OPS_GIT_BRANCH = DEFAULT_CLUSTERVERSE_BRANCH if (env.CHANGE_BRANCH) { - CV_OPS_GIT_BRANCH = env.CHANGE_BRANCH -} -else if (env.BRANCH_NAME) { - CV_OPS_GIT_BRANCH = env.BRANCH_NAME + CV_OPS_GIT_BRANCH = env.CHANGE_BRANCH +} else if (env.BRANCH_NAME) { + CV_OPS_GIT_BRANCH = env.BRANCH_NAME } //This allows us to copy&paste this entire script into a pipeline job in the GUI for faster development time (don't have to commit/push to Git to test every change). def scmVars = null if (currentBuild.getBuildCauses('hudson.model.Cause$SCMTriggerCause').size() > 0) { - scmVars = checkout scm + scmVars = checkout scm } @@ -29,73 +28,74 @@ if (currentBuild.getBuildCauses('hudson.model.Cause$SCMTriggerCause').size() > 0 // It takes some inspiration from this blog: https://www.jenkins.io/blog/2019/12/02/matrix-building-with-scripted-pipeline/ /**************************************************************************************/ class MatrixBuilder { - private HashMap jenkinsParamsCopy - private HashMap _matrixParams //This cannot be made into a Closure due to CPS (again). (https://www.jenkins.io/doc/book/pipeline/cps-method-mismatches/) - private Closure clJenkinsParamsMutate - private Closure clMatrixAxesFilter - private Closure clTaskMap - - // NOTE: No constructor. - // When undeclared, constructors are created automatically, creating the instance variables defined above, (where they correspond to the Map that is passed with the instantiation). You can't do a lot of work in a Jenkins Groovy constructor anyway because of CPS (https://www.jenkins.io/doc/book/pipeline/cps-method-mismatches/) - - public Map getTaskMap() { - HashMap tasks = [failFast: false] - _getMatrixAxes().each() { axis -> - List axisEnvVars = axis.collect { key, val -> "${key}=${val}" } - axisEnvVars.add("BUILD_HASH=" + generateMD5(hashCode() + axisEnvVars.join(','), 12)) //A unique build hash of the classid (hashcode) and the matrix elements - tasks[axisEnvVars.join(', ')] = { this.clTaskMap(axisEnvVars) } + private HashMap jenkinsParams + private HashMap _matrixParams //This cannot be made into a Closure due to CPS (again). (https://www.jenkins.io/doc/book/pipeline/cps-method-mismatches/) + private Closure clJenkinsParamsMutate //A closure that mutates the Jenkins params _before_ we calculate the axes. Useful in case some params are not intended to be part of the axes (and should be removed) + private Closure clMatrixAxesFilter //A closure of invalid axis combinations, which allows us to filter out combinations that are incompatible with each other (e.g. testing internet explorer on linux) + private Closure clTaskMap + + // NOTE: No constructor. + // When undeclared, constructors are created automatically, creating the instance variables defined above, (where they correspond to the Map that is passed with the instantiation). You can't do a lot of work in a Jenkins Groovy constructor anyway because of CPS (https://www.jenkins.io/doc/book/pipeline/cps-method-mismatches/) + + public Map getTaskMap() { + HashMap tasks = [failFast: false] + _getMatrixAxes().each() { axis -> + List axisEnvVars = axis.collect { key, val -> "${key}=${val}" } + axisEnvVars.add("BUILD_HASH=" + generateMD5(hashCode() + axisEnvVars.join(','), 12)) //A unique build hash of the classid (hashcode) and the matrix elements + tasks[axisEnvVars.join(', ')] = { this.clTaskMap(axisEnvVars) } + } + return (tasks) + } + + private List _getMatrixAxes() { + this._getMatrixParams() + List allCombinations = this._getAxesCombinations() + return (this.clMatrixAxesFilter ? allCombinations.findAll(this.clMatrixAxesFilter) : allCombinations) } - return (tasks) - } - - private List _getMatrixAxes() { - this._getMatrixParams() - List allCombinations = this._getAxesCombinations() - return (this.clMatrixAxesFilter ? allCombinations.findAll(this.clMatrixAxesFilter) : allCombinations) - } - - private HashMap _getMatrixParams() { - HashMap newMatrixParams = Eval.me(this.jenkinsParamsCopy.inspect()) - newMatrixParams = this.clJenkinsParamsMutate ? this.clJenkinsParamsMutate(newMatrixParams) : newMatrixParams - newMatrixParams = newMatrixParams.each { key, choice -> newMatrixParams.put(key, (choice instanceof String) ? choice.split(',') : choice.toString()) } //newMatrixParams().each { param -> param.value = (param.value instanceof String) ? param.value.split(',') : param.value } //NOTE: Doesn't work: https://www.jenkins.io/doc/book/pipeline/cps-method-mismatches/ - this._matrixParams = newMatrixParams - return (newMatrixParams) - } - - @NonCPS - private List _getAxesCombinations() { - List axes = [] - this._matrixParams.each { axis, values -> - List axisList = [] - values.each { value -> - axisList << [(axis): value] - } - axes << axisList + + private HashMap _getMatrixParams() { + HashMap newMatrixParams = Eval.me(this.jenkinsParams.inspect()) + newMatrixParams = this.clJenkinsParamsMutate ? this.clJenkinsParamsMutate(newMatrixParams) : newMatrixParams + newMatrixParams = newMatrixParams.each { key, choice -> newMatrixParams.put(key, (choice instanceof String) ? choice.split(',') : choice.toString()) } //newMatrixParams().each { param -> param.value = (param.value instanceof String) ? param.value.split(',') : param.value } //NOTE: Doesn't work: https://www.jenkins.io/doc/book/pipeline/cps-method-mismatches/ + this._matrixParams = newMatrixParams + return (newMatrixParams) + } + + @NonCPS + private List _getAxesCombinations() { + List axes = [] + this._matrixParams.each { axis, values -> + List axisList = [] + values.each { value -> + axisList << [(axis): value] + } + axes << axisList + } + axes.combinations()*.sum() // calculates the cartesian product } - axes.combinations()*.sum() // calculates the cartesian product - } - static String generateMD5(String s, int len = 31) { - java.security.MessageDigest.getInstance("MD5").digest(s.bytes).encodeHex().toString()[0..len] - } + static String generateMD5(String s, int len = 31) { + java.security.MessageDigest.getInstance("MD5").digest(s.bytes).encodeHex().toString()[0..len] + } } properties([ - //disableConcurrentBuilds(), - //pipelineTriggers([pollSCM(ignorePostCommitHooks: true, scmpoll_spec: '''H/30 8-19 * * 1-5''')]), - parameters([ - extendedChoice(name: 'CLOUD_REGION', type: 'PT_MULTI_SELECT', value: 'esxifree/dougalab,aws/eu-west-1,gcp/europe-west1,azure/westeurope', description: 'Specify which cloud/region(s) to test', visibleItemCount: 5), - choice(name: 'BUILDENV', choices: ['', 'dev'], description: "The environment in which to run the tests"), - string(name: 'CLUSTER_ID', defaultValue: 'testsuite', trim: true), - [name: 'DNS_FORCE_DISABLE', $class: 'ChoiceParameter', choiceType: 'PT_RADIO', description: '', randomName: 'choice-parameter-31196915540455', script: [$class: 'GroovyScript', fallbackScript: [classpath: [], sandbox: true, script: ''], script: [classpath: [], sandbox: true, script: 'return [\'false:selected\',\'true\',\'true,false\']']]], - extendedChoice(name: 'REDEPLOY_SCHEME', type: 'PT_CHECKBOX', value: '_scheme_addallnew_rmdisk_rollback,_scheme_addnewvm_rmdisk_rollback,_scheme_rmvm_rmdisk_only,_scheme_rmvm_keepdisk_rollback', defaultValue: '_scheme_addallnew_rmdisk_rollback,_scheme_addnewvm_rmdisk_rollback,_scheme_rmvm_rmdisk_only,_scheme_rmvm_keepdisk_rollback', description: 'Specify which redeploy scheme(s) to test', visibleItemCount: 5), - choice(name: 'CLEAN_ON_FAILURE', choices: [true, false], description: "Run a clusterverse clean in the event of a failure."), - extendedChoice(name: 'MYHOSTTYPES_TEST', type: 'PT_MULTI_SELECT', value: 'nomyhosttypes,myhosttypes', defaultValue: 'nomyhosttypes', descriptionPropertyValue: 'Without myhosttypes, With myhosttypes', description: 'Whether to run tests on pre-configured hosttypes.', visibleItemCount: 2), - [name: 'MYHOSTTYPES_LIST', $class: 'DynamicReferenceParameter', choiceType: 'ET_FORMATTED_HTML', description: 'These hosttype definitions must exist in cluster_vars for all clusters', randomName: 'choice-parameter-423779762617532', referencedParameters: 'MYHOSTTYPES_TEST', script: [$class: 'GroovyScript', fallbackScript: [classpath: [], sandbox: true, script: 'return ""'], script: [classpath: [], sandbox: true, script: 'if (MYHOSTTYPES_TEST.split(\',\').contains(\'myhosttypes\')) { return ("") }']]], - [name: 'MYHOSTTYPES_SERIAL_PARALLEL', $class: 'CascadeChoiceParameter', choiceType: 'PT_RADIO', description: 'Run the myhosttype test in serial or parallel', randomName: 'choice-parameter-424489601389882', referencedParameters: 'MYHOSTTYPES_TEST', script: [$class: 'GroovyScript', fallbackScript: [classpath: [], sandbox: true, script: 'return([])'], script: [classpath: [], sandbox: true, script: 'if (MYHOSTTYPES_TEST==\'nomyhosttypes,myhosttypes\') { return([\'serial:selected\',\'parallel\']) }']]], - extendedChoice(name: 'IMAGE_TESTED', type: 'PT_MULTI_SELECT', value: '_ubuntu2004image,_centos7image', defaultValue: '_ubuntu2004image', descriptionPropertyValue: 'Ubuntu 20.04, CentOS 7', description: 'Specify which image(s) to test', visibleItemCount: 3), - ]) + //disableConcurrentBuilds(), + //pipelineTriggers([pollSCM(ignorePostCommitHooks: true, scmpoll_spec: '''H/30 8-19 * * 1-5''')]), + parameters([ + extendedChoice(name: 'CLOUD_REGION', type: 'PT_MULTI_SELECT', value: 'esxifree/dougalab,aws/eu-west-1,gcp/europe-west1,azure/westeurope', description: 'Specify which cloud/region(s) to test', visibleItemCount: 5), + choice(name: 'BUILDENV', choices: ['', 'dev'], description: "The environment in which to run the tests"), + string(name: 'CLUSTER_ID', defaultValue: 'testsuite', trim: true), + [name: 'DNS_FORCE_DISABLE', $class: 'ChoiceParameter', choiceType: 'PT_RADIO', description: '', randomName: 'choice-parameter-31196915540455', script: [$class: 'GroovyScript', fallbackScript: [classpath: [], sandbox: true, script: ''], script: [classpath: [], sandbox: true, script: 'return [\'false:selected\',\'true\',\'true,false\']']]], + extendedChoice(name: 'REDEPLOY_SCHEME', type: 'PT_CHECKBOX', value: '_scheme_addallnew_rmdisk_rollback,_scheme_addnewvm_rmdisk_rollback,_scheme_rmvm_rmdisk_only,_scheme_rmvm_keepdisk_rollback', defaultValue: '_scheme_addallnew_rmdisk_rollback,_scheme_addnewvm_rmdisk_rollback,_scheme_rmvm_rmdisk_only,_scheme_rmvm_keepdisk_rollback', description: 'Specify which redeploy scheme(s) to test', visibleItemCount: 5), + choice(name: 'CLEAN_ON_FAILURE', choices: [true, false], description: "Run a clusterverse clean in the event of a failure."), + extendedChoice(name: 'MYHOSTTYPES_TEST', type: 'PT_MULTI_SELECT', value: 'nomyhosttypes,myhosttypes', defaultValue: 'nomyhosttypes', descriptionPropertyValue: 'Without myhosttypes, With myhosttypes', description: 'Whether to run tests on pre-configured hosttypes.', visibleItemCount: 3), + [name: 'MYHOSTTYPES_LIST', $class: 'DynamicReferenceParameter', choiceType: 'ET_FORMATTED_HTML', description: 'These hosttype definitions must exist in cluster_vars for all clusters', randomName: 'choice-parameter-423779762617532', referencedParameters: 'MYHOSTTYPES_TEST', script: [$class: 'GroovyScript', fallbackScript: [classpath: [], sandbox: true, script: 'return ""'], script: [classpath: [], sandbox: true, script: 'if (MYHOSTTYPES_TEST.split(\',\').contains(\'myhosttypes\')) { return ("") }']]], + [name: 'MYHOSTTYPES_SERIAL_PARALLEL', $class: 'CascadeChoiceParameter', choiceType: 'PT_RADIO', description: 'Run the myhosttype test in serial or parallel', randomName: 'choice-parameter-424489601389882', referencedParameters: 'MYHOSTTYPES_TEST', script: [$class: 'GroovyScript', fallbackScript: [classpath: [], sandbox: true, script: 'return([])'], script: [classpath: [], sandbox: true, script: 'if (MYHOSTTYPES_TEST==\'nomyhosttypes,myhosttypes\') { return([\'serial:selected\',\'parallel\']) }']]], + extendedChoice(name: 'SCALEUPDOWN', type: 'PT_MULTI_SELECT', value: 'noscale,scaleup,scaledown', defaultValue: 'noscale', description: 'Specify whether to test scaling up and/or down.', visibleItemCount: 3), + extendedChoice(name: 'IMAGE_TESTED', type: 'PT_MULTI_SELECT', value: '_ubuntu2004image,_centos7image', defaultValue: '_ubuntu2004image', descriptionPropertyValue: 'Ubuntu 20.04, CentOS 7', description: 'Specify which image(s) to test', visibleItemCount: 3), + ]) ]) println("User-supplied 'params': \n" + params.inspect() + "\n") @@ -107,75 +107,78 @@ println("User-supplied 'params': \n" + params.inspect() + "\n") // A class to hold the status of each stage, so we can fail a stage and be able to run the clean at the end if needed class cStageBuild { - public String result = 'SUCCESS' - public HashMap userParams = [:] - - String getUserParamsString() { - String userParamsString = "" - this.userParams.each({paramName, paramVal -> - userParamsString += " -e ${paramName}=${paramVal}" - }) - return(userParamsString + " -vvvv") - } + public String result = 'SUCCESS' + public HashMap userParams = [:] + + String getUserParamsString() { + String userParamsString = "" + this.userParams.each({ paramName, paramVal -> + userParamsString += " -e ${paramName}=${paramVal}" + }) + return (userParamsString + " -vvvv") + } } // A pipeline 'stage' template for clusterverse-ops boilerplace def stage_cvops(String stageLabel, cStageBuild stageBuild, Closure stageExpressions) { - stage(stageLabel) { - if (stageBuild.result == 'SUCCESS') { - try { - stageExpressions() - } catch (Exception err) { - currentBuild.result = 'FAILURE' - stageBuild.result = 'FAILURE' - unstable('Stage failed! Error was: ' + err) // OR: 'error "Stage failure"' or 'throw new org.jenkinsci.plugins.workflow.steps.FlowInterruptedException(hudson.model.Result.FAILURE)', but both of these fail all future stages, preventing us calling the clean. - } + stage(stageLabel) { + if (stageBuild.result == 'SUCCESS') { + try { + stageExpressions() + } catch (Exception err) { + currentBuild.result = 'FAILURE' + stageBuild.result = 'FAILURE' + unstable('Stage failed! Error was: ' + err) // OR: 'error "Stage failure"' or 'throw new org.jenkinsci.plugins.workflow.steps.FlowInterruptedException(hudson.model.Result.FAILURE)', but both of these fail all future stages, preventing us calling the clean. + } + } } - } } /**************************************************************************************/ // A 'self-test' matrix. Doesn't actually do anything, just tests the logic of the matrix /**************************************************************************************/ SELFTEST = new MatrixBuilder([ - jenkinsParamsCopy: params, - clJenkinsParamsMutate: { jenkinsParamsCopy -> - jenkinsParamsCopy.remove('MYHOSTTYPES_LIST') - jenkinsParamsCopy.remove('MYHOSTTYPES_TEST') - jenkinsParamsCopy.remove('MYHOSTTYPES_SERIAL_PARALLEL') - jenkinsParamsCopy.remove('CLEAN_ON_FAILURE') - return jenkinsParamsCopy - }, - clMatrixAxesFilter: { axis -> - !(params.DNS_TEST == 'both' && axis['DNS_FORCE_DISABLE'] == 'true' && axis['CLOUD_REGION'] == 'esxifree/dougalab') && - !(axis['IMAGE_TESTED'] != '_ubuntu2004image' && axis['CLOUD_REGION'] == 'esxifree/dougalab') - }, - clTaskMap: { axisEnvVars -> - node { - withEnv(axisEnvVars) { - withCredentials([string(credentialsId: "VAULT_PASSWORD_${env.BUILDENV.toUpperCase()}", variable: 'VAULT_PASSWORD_BUILDENV')]) { - env.VAULT_PASSWORD_BUILDENV = VAULT_PASSWORD_BUILDENV - } - sh 'printenv | sort' - def stageBuild = new cStageBuild([result: 'SUCCESS']) - - stage_cvops('deploy', stageBuild, { - echo "deploy" - }) - - stage_cvops('redeploy (1/4 fail)', stageBuild, { - echo "redeploy" - //Test that script can fail individual stages (1 in 4 should fail) - def x = Math.abs(new Random().nextInt() % 4) + 1 - if (x == 1) throw new IllegalStateException("Test failed stage") - }) - - stage_cvops('deploy on top', stageBuild, { - echo "deploy on top" - }) + jenkinsParams: params, + clJenkinsParamsMutate: { jenkinsParams -> + jenkinsParams.remove('MYHOSTTYPES_LIST') + jenkinsParams.remove('MYHOSTTYPES_TEST') + jenkinsParams.remove('MYHOSTTYPES_SERIAL_PARALLEL') + jenkinsParams.remove('CLEAN_ON_FAILURE') + return jenkinsParams + }, + clMatrixAxesFilter: { axis -> + !(params.DNS_TEST == 'both' && axis['DNS_FORCE_DISABLE'] == 'true' && axis['CLOUD_REGION'] == 'esxifree/dougalab') && /* DNS is not supported in dougalab */ + !(axis['IMAGE_TESTED'] != '_ubuntu2004image' && axis['CLOUD_REGION'] == 'esxifree/dougalab') && /* Only _ubuntu2004image is supported in dougalab */ + !(axis['REDEPLOY_SCHEME'] == '_scheme_rmvm_keepdisk_rollback' && axis['CLOUD_REGION'].startsWith('azure/')) && /* _scheme_rmvm_keepdisk_rollback not supported in Azure */ + !(axis['REDEPLOY_SCHEME'] == '_scheme_rmvm_rmdisk_only' && axis['SCALEUPDOWN'] == 'scaledown') && /* _scheme_rmvm_rmdisk_only only supports scaling up */ + !(axis['REDEPLOY_SCHEME'] == '_scheme_rmvm_keepdisk_rollback' && (axis['SCALEUPDOWN'] == 'scaledown' || axis['SCALEUPDOWN'] == 'scaleup')) /* _scheme_rmvm_keepdisk_rollback does not support scaling */ + }, + clTaskMap: { axisEnvVars -> + node { + withEnv(axisEnvVars) { + withCredentials([string(credentialsId: "VAULT_PASSWORD_${env.BUILDENV.toUpperCase()}", variable: 'VAULT_PASSWORD_BUILDENV')]) { + env.VAULT_PASSWORD_BUILDENV = VAULT_PASSWORD_BUILDENV + } + sh 'printenv | sort' + def stageBuild = new cStageBuild([result: 'SUCCESS']) + + stage_cvops('deploy', stageBuild, { + echo "deploy" + }) + + stage_cvops('redeploy (1/4 fail)', stageBuild, { + echo "redeploy" + //Test that script can fail individual stages (1 in 4 should fail) + def x = Math.abs(new Random().nextInt() % 4) + 1 + if (x == 1) throw new IllegalStateException("Test failed stage") + }) + + stage_cvops('deploy on top', stageBuild, { + echo "deploy on top" + }) + } + } } - } - } ]) @@ -183,79 +186,113 @@ SELFTEST = new MatrixBuilder([ // Runs tests *without* setting myhosttypes. This is a relatively straightforward application of the matrix algorithm. /**************************************************************************************/ CVTEST_NOMYHOSTTYPES = new MatrixBuilder([ - jenkinsParamsCopy: params, - clJenkinsParamsMutate: { jenkinsParamsCopy -> - jenkinsParamsCopy.remove('MYHOSTTYPES_LIST') - jenkinsParamsCopy.remove('MYHOSTTYPES_TEST') - jenkinsParamsCopy.remove('MYHOSTTYPES_SERIAL_PARALLEL') - jenkinsParamsCopy.remove('CLEAN_ON_FAILURE') - return jenkinsParamsCopy - }, - clMatrixAxesFilter: { axis -> - !(params.DNS_TEST == 'both' && axis['DNS_FORCE_DISABLE'] == 'true' && axis['CLOUD_REGION'] == 'esxifree/dougalab') && - !(axis['IMAGE_TESTED'] != '_ubuntu2004image' && axis['CLOUD_REGION'] == 'esxifree/dougalab') - }, - clTaskMap: { axisEnvVars -> - node { - withEnv(axisEnvVars) { - withCredentials([string(credentialsId: "VAULT_PASSWORD_${env.BUILDENV.toUpperCase()}", variable: 'VAULT_PASSWORD_BUILDENV')]) { - env.VAULT_PASSWORD_BUILDENV = VAULT_PASSWORD_BUILDENV - } - sh 'printenv | sort' - def stageBuild = new cStageBuild([result: 'SUCCESS']) - - if (env.IMAGE_TESTED) { - stageBuild.userParams.put("cluster_vars_override", "\\\'{\\\"image\\\":\\\"{{${env.IMAGE_TESTED}}}\\\"}\\\'") //NOTE: NO SPACES are allowed in this!! - } - - - stageBuild.userParams.put("skip_release_version_check", "true") - stageBuild.userParams.put("release_version", "1_0_0") - stage_cvops('deploy', stageBuild, { - build job: 'clusterverse/clusterverse-ops', parameters: [string(name: 'APP_NAME', value: "cvtest-${BUILD_NUMBER}-${env.BUILD_HASH}"), string(name: 'CLOUD_REGION', value: env.CLOUD_REGION), string(name: 'BUILDENV', value: env.BUILDENV), string(name: 'CLUSTER_ID', value: env.CLUSTER_ID), booleanParam(name: 'DNS_FORCE_DISABLE', value: env.DNS_FORCE_DISABLE), string(name: 'DEPLOY_TYPE', value: 'deploy'), string(name: 'REDEPLOY_SCHEME', value: ''), string(name: 'CANARY', value: 'none'), booleanParam(name: 'CANARY_TIDY_ON_SUCCESS', value: true), string(name: 'MYHOSTTYPES', value: ''), string(name: 'CV_GIT_URL', value: scmVars ? scmVars.getUserRemoteConfigs()[0].getUrl() : DEFAULT_CLUSTERVERSE_URL), string(name: 'CV_GIT_BRANCH', value: CV_OPS_GIT_BRANCH), string(name: 'USER_CMDLINE_VARS', value: stageBuild.userParamsString())] - }) - - if (env.REDEPLOY_SCHEME) { - stageBuild.userParams.put("release_version", "2_0_0") - stage_cvops('redeploy canary=start', stageBuild, { - build job: 'clusterverse/clusterverse-ops', parameters: [string(name: 'APP_NAME', value: "cvtest-${env.BUILD_NUMBER}-${env.BUILD_HASH}"), string(name: 'CLOUD_REGION', value: env.CLOUD_REGION), string(name: 'BUILDENV', value: env.BUILDENV), string(name: 'CLUSTER_ID', value: env.CLUSTER_ID), booleanParam(name: 'DNS_FORCE_DISABLE', value: DNS_FORCE_DISABLE), string(name: 'DEPLOY_TYPE', value: 'redeploy'), string(name: 'REDEPLOY_SCHEME', value: (env.REDEPLOY_SCHEME ? env.REDEPLOY_SCHEME : '')), string(name: 'CANARY', value: 'start'), booleanParam(name: 'CANARY_TIDY_ON_SUCCESS', value: false), string(name: 'MYHOSTTYPES', value: ''), string(name: 'CV_GIT_URL', value: scmVars ? scmVars.getUserRemoteConfigs()[0].getUrl() : DEFAULT_CLUSTERVERSE_URL), string(name: 'CV_GIT_BRANCH', value: CV_OPS_GIT_BRANCH), string(name: 'USER_CMDLINE_VARS', value: stageBuild.userParamsString())] - }) - - stage_cvops('redeploy canary=finish', stageBuild, { - build job: 'clusterverse/clusterverse-ops', parameters: [string(name: 'APP_NAME', value: "cvtest-${env.BUILD_NUMBER}-${env.BUILD_HASH}"), string(name: 'CLOUD_REGION', value: env.CLOUD_REGION), string(name: 'BUILDENV', value: env.BUILDENV), string(name: 'CLUSTER_ID', value: env.CLUSTER_ID), booleanParam(name: 'DNS_FORCE_DISABLE', value: DNS_FORCE_DISABLE), string(name: 'DEPLOY_TYPE', value: 'redeploy'), string(name: 'REDEPLOY_SCHEME', value: (env.REDEPLOY_SCHEME ? env.REDEPLOY_SCHEME : '')), string(name: 'CANARY', value: 'finish'), booleanParam(name: 'CANARY_TIDY_ON_SUCCESS', value: false), string(name: 'MYHOSTTYPES', value: ''), string(name: 'CV_GIT_URL', value: scmVars ? scmVars.getUserRemoteConfigs()[0].getUrl() : DEFAULT_CLUSTERVERSE_URL), string(name: 'CV_GIT_BRANCH', value: CV_OPS_GIT_BRANCH), string(name: 'USER_CMDLINE_VARS', value: stageBuild.userParamsString())] - }) - - stage_cvops('redeploy canary=tidy', stageBuild, { - build job: 'clusterverse/clusterverse-ops', parameters: [string(name: 'APP_NAME', value: "cvtest-${env.BUILD_NUMBER}-${env.BUILD_HASH}"), string(name: 'CLOUD_REGION', value: env.CLOUD_REGION), string(name: 'BUILDENV', value: env.BUILDENV), string(name: 'CLUSTER_ID', value: env.CLUSTER_ID), booleanParam(name: 'DNS_FORCE_DISABLE', value: DNS_FORCE_DISABLE), string(name: 'DEPLOY_TYPE', value: 'redeploy'), string(name: 'REDEPLOY_SCHEME', value: (env.REDEPLOY_SCHEME ? env.REDEPLOY_SCHEME : '')), string(name: 'CANARY', value: 'tidy'), booleanParam(name: 'CANARY_TIDY_ON_SUCCESS', value: false), string(name: 'MYHOSTTYPES', value: ''), string(name: 'CV_GIT_URL', value: scmVars ? scmVars.getUserRemoteConfigs()[0].getUrl() : DEFAULT_CLUSTERVERSE_URL), string(name: 'CV_GIT_BRANCH', value: CV_OPS_GIT_BRANCH), string(name: 'USER_CMDLINE_VARS', value: stageBuild.userParamsString())] - }) - - stageBuild.userParams.put("release_version", "3_0_0") - stage_cvops('redeploy canary=none (tidy_on_success)', stageBuild, { - build job: 'clusterverse/clusterverse-ops', parameters: [string(name: 'APP_NAME', value: "cvtest-${env.BUILD_NUMBER}-${env.BUILD_HASH}"), string(name: 'CLOUD_REGION', value: env.CLOUD_REGION), string(name: 'BUILDENV', value: env.BUILDENV), string(name: 'CLUSTER_ID', value: env.CLUSTER_ID), booleanParam(name: 'DNS_FORCE_DISABLE', value: env.DNS_FORCE_DISABLE), string(name: 'DEPLOY_TYPE', value: 'redeploy'), string(name: 'REDEPLOY_SCHEME', value: (env.REDEPLOY_SCHEME ? env.REDEPLOY_SCHEME : '')), string(name: 'CANARY', value: 'none'), booleanParam(name: 'CANARY_TIDY_ON_SUCCESS', value: true), string(name: 'MYHOSTTYPES', value: ''), string(name: 'CV_GIT_URL', value: scmVars ? scmVars.getUserRemoteConfigs()[0].getUrl() : DEFAULT_CLUSTERVERSE_URL), string(name: 'CV_GIT_BRANCH', value: CV_OPS_GIT_BRANCH), string(name: 'USER_CMDLINE_VARS', value: stageBuild.userParamsString())] - }) - } else { - stage_cvops('Redeploy not requested', stageBuild, { - echo "Redeploy testing not requested" - }) - } - - stage_cvops('deploy on top', stageBuild, { - build job: 'clusterverse/clusterverse-ops', parameters: [string(name: 'APP_NAME', value: "cvtest-${env.BUILD_NUMBER}-${env.BUILD_HASH}"), string(name: 'CLOUD_REGION', value: env.CLOUD_REGION), string(name: 'BUILDENV', value: env.BUILDENV), string(name: 'CLUSTER_ID', value: env.CLUSTER_ID), booleanParam(name: 'DNS_FORCE_DISABLE', value: env.DNS_FORCE_DISABLE), string(name: 'DEPLOY_TYPE', value: 'deploy'), string(name: 'REDEPLOY_SCHEME', value: ''), string(name: 'CANARY', value: 'none'), booleanParam(name: 'CANARY_TIDY_ON_SUCCESS', value: true), string(name: 'MYHOSTTYPES', value: ''), string(name: 'CV_GIT_URL', value: scmVars ? scmVars.getUserRemoteConfigs()[0].getUrl() : DEFAULT_CLUSTERVERSE_URL), string(name: 'CV_GIT_BRANCH', value: CV_OPS_GIT_BRANCH), string(name: 'USER_CMDLINE_VARS', value: stageBuild.userParamsString())] - }) - - if (stageBuild.result == 'SUCCESS' || params.CLEAN_ON_FAILURE == 'true') { - stage('clean') { - if (stageBuild.result != 'SUCCESS') { - echo "Stage failure: Running clean-up on cluster..." - } - catchError { - build job: 'clusterverse/clusterverse-ops', parameters: [string(name: 'APP_NAME', value: "cvtest-${env.BUILD_NUMBER}-${env.BUILD_HASH}"), string(name: 'CLOUD_REGION', value: env.CLOUD_REGION), string(name: 'BUILDENV', value: env.BUILDENV), string(name: 'CLUSTER_ID', value: env.CLUSTER_ID), booleanParam(name: 'DNS_FORCE_DISABLE', value: env.DNS_FORCE_DISABLE), string(name: 'DEPLOY_TYPE', value: 'clean'), string(name: 'REDEPLOY_SCHEME', value: ''), string(name: 'CANARY', value: 'none'), booleanParam(name: 'CANARY_TIDY_ON_SUCCESS', value: true), string(name: 'MYHOSTTYPES', value: ''), string(name: 'CV_GIT_URL', value: scmVars ? scmVars.getUserRemoteConfigs()[0].getUrl() : DEFAULT_CLUSTERVERSE_URL), string(name: 'CV_GIT_BRANCH', value: CV_OPS_GIT_BRANCH), string(name: 'USER_CMDLINE_VARS', value: stageBuild.userParamsString())] - } + jenkinsParams: params, + clJenkinsParamsMutate: { jenkinsParams -> + jenkinsParams.remove('MYHOSTTYPES_LIST') + jenkinsParams.remove('MYHOSTTYPES_TEST') + jenkinsParams.remove('MYHOSTTYPES_SERIAL_PARALLEL') + jenkinsParams.remove('CLEAN_ON_FAILURE') + return jenkinsParams + }, + clMatrixAxesFilter: { axis -> + !(params.DNS_TEST == 'both' && axis['DNS_FORCE_DISABLE'] == 'true' && axis['CLOUD_REGION'] == 'esxifree/dougalab') && /* DNS is not supported in dougalab */ + !(axis['IMAGE_TESTED'] != '_ubuntu2004image' && axis['CLOUD_REGION'] == 'esxifree/dougalab') && /* Only _ubuntu2004image is supported in dougalab */ + !(axis['REDEPLOY_SCHEME'] == '_scheme_rmvm_keepdisk_rollback' && axis['CLOUD_REGION'].startsWith('azure/')) && /* _scheme_rmvm_keepdisk_rollback not supported in Azure */ + !(axis['REDEPLOY_SCHEME'] == '_scheme_rmvm_rmdisk_only' && axis['SCALEUPDOWN'] == 'scaledown') && /* _scheme_rmvm_rmdisk_only only supports scaling up */ + !(axis['REDEPLOY_SCHEME'] == '_scheme_rmvm_keepdisk_rollback' && (axis['SCALEUPDOWN'] == 'scaledown' || axis['SCALEUPDOWN'] == 'scaleup')) /* _scheme_rmvm_keepdisk_rollback does not support scaling */ + }, + clTaskMap: { axisEnvVars -> + node { + withEnv(axisEnvVars) { + withCredentials([string(credentialsId: "VAULT_PASSWORD_${env.BUILDENV.toUpperCase()}", variable: 'VAULT_PASSWORD_BUILDENV')]) { + env.VAULT_PASSWORD_BUILDENV = VAULT_PASSWORD_BUILDENV + } + sh 'printenv | sort' + def stageBuild = new cStageBuild([result: 'SUCCESS']) + HashMap cluster_vars_override = [:] + + if (env.IMAGE_TESTED) { + cluster_vars_override += [image: "{{${env.IMAGE_TESTED}}}"] + stageBuild.userParams.put("cluster_vars_override", "\\\'" + groovy.json.JsonOutput.toJson(cluster_vars_override).replace("\"", "\\\"") + "\\\'") //NOTE: NO SPACES are allowed in this!! + } + + stageBuild.userParams.put("skip_release_version_check", "true") + stageBuild.userParams.put("release_version", "1_0_0") + stage_cvops('deploy', stageBuild, { + build job: 'clusterverse/clusterverse-ops', parameters: [string(name: 'APP_NAME', value: "cvtest-${BUILD_NUMBER}-${env.BUILD_HASH}"), string(name: 'CLOUD_REGION', value: env.CLOUD_REGION), string(name: 'BUILDENV', value: env.BUILDENV), string(name: 'CLUSTER_ID', value: env.CLUSTER_ID), booleanParam(name: 'DNS_FORCE_DISABLE', value: env.DNS_FORCE_DISABLE), string(name: 'DEPLOY_TYPE', value: 'deploy'), string(name: 'REDEPLOY_SCHEME', value: ''), string(name: 'CANARY', value: 'none'), booleanParam(name: 'CANARY_TIDY_ON_SUCCESS', value: true), string(name: 'MYHOSTTYPES', value: ''), string(name: 'CV_GIT_URL', value: scmVars ? scmVars.getUserRemoteConfigs()[0].getUrl() : DEFAULT_CLUSTERVERSE_URL), string(name: 'CV_GIT_BRANCH', value: CV_OPS_GIT_BRANCH), string(name: 'USER_CMDLINE_VARS', value: stageBuild.getUserParamsString())] + }) + + // Update the clustervars with new scaled cluster size + if (env.SCALEUPDOWN == 'scaleup') { + cluster_vars_override += ["${env.BUILDENV}": [hosttype_vars: [sys: [vms_by_az: [b: 1, c: 1]]]]] // AZ 'c' is not set normally + } else if (env.SCALEUPDOWN == 'scaledown') { + cluster_vars_override += ["${env.BUILDENV}": [hosttype_vars: [sys: [vms_by_az: [b: 0, c: 0]]]]] // AZ 'b' is set normally + } + + if (cluster_vars_override.size()) { + stageBuild.userParams.put("cluster_vars_override", "\\\'" + groovy.json.JsonOutput.toJson(cluster_vars_override).replace("\"", "\\\"") + "\\\'") //NOTE: NO SPACES are allowed in this!! + } + + if (env.REDEPLOY_SCHEME) { + stageBuild.userParams.put("release_version", "2_0_0") + stage_cvops('redeploy canary=start', stageBuild, { + build job: 'clusterverse/clusterverse-ops', parameters: [string(name: 'APP_NAME', value: "cvtest-${env.BUILD_NUMBER}-${env.BUILD_HASH}"), string(name: 'CLOUD_REGION', value: env.CLOUD_REGION), string(name: 'BUILDENV', value: env.BUILDENV), string(name: 'CLUSTER_ID', value: env.CLUSTER_ID), booleanParam(name: 'DNS_FORCE_DISABLE', value: env.DNS_FORCE_DISABLE), string(name: 'DEPLOY_TYPE', value: 'redeploy'), string(name: 'REDEPLOY_SCHEME', value: (env.REDEPLOY_SCHEME ? env.REDEPLOY_SCHEME : '')), string(name: 'CANARY', value: 'start'), booleanParam(name: 'CANARY_TIDY_ON_SUCCESS', value: false), string(name: 'MYHOSTTYPES', value: ''), string(name: 'CV_GIT_URL', value: scmVars ? scmVars.getUserRemoteConfigs()[0].getUrl() : DEFAULT_CLUSTERVERSE_URL), string(name: 'CV_GIT_BRANCH', value: CV_OPS_GIT_BRANCH), string(name: 'USER_CMDLINE_VARS', value: stageBuild.getUserParamsString())] + }) + + stage_cvops('redeploy canary=finish', stageBuild, { + build job: 'clusterverse/clusterverse-ops', parameters: [string(name: 'APP_NAME', value: "cvtest-${env.BUILD_NUMBER}-${env.BUILD_HASH}"), string(name: 'CLOUD_REGION', value: env.CLOUD_REGION), string(name: 'BUILDENV', value: env.BUILDENV), string(name: 'CLUSTER_ID', value: env.CLUSTER_ID), booleanParam(name: 'DNS_FORCE_DISABLE', value: env.DNS_FORCE_DISABLE), string(name: 'DEPLOY_TYPE', value: 'redeploy'), string(name: 'REDEPLOY_SCHEME', value: (env.REDEPLOY_SCHEME ? env.REDEPLOY_SCHEME : '')), string(name: 'CANARY', value: 'finish'), booleanParam(name: 'CANARY_TIDY_ON_SUCCESS', value: false), string(name: 'MYHOSTTYPES', value: ''), string(name: 'CV_GIT_URL', value: scmVars ? scmVars.getUserRemoteConfigs()[0].getUrl() : DEFAULT_CLUSTERVERSE_URL), string(name: 'CV_GIT_BRANCH', value: CV_OPS_GIT_BRANCH), string(name: 'USER_CMDLINE_VARS', value: stageBuild.getUserParamsString())] + }) + + stage_cvops('redeploy canary=tidy', stageBuild, { + build job: 'clusterverse/clusterverse-ops', parameters: [string(name: 'APP_NAME', value: "cvtest-${env.BUILD_NUMBER}-${env.BUILD_HASH}"), string(name: 'CLOUD_REGION', value: env.CLOUD_REGION), string(name: 'BUILDENV', value: env.BUILDENV), string(name: 'CLUSTER_ID', value: env.CLUSTER_ID), booleanParam(name: 'DNS_FORCE_DISABLE', value: env.DNS_FORCE_DISABLE), string(name: 'DEPLOY_TYPE', value: 'redeploy'), string(name: 'REDEPLOY_SCHEME', value: (env.REDEPLOY_SCHEME ? env.REDEPLOY_SCHEME : '')), string(name: 'CANARY', value: 'tidy'), booleanParam(name: 'CANARY_TIDY_ON_SUCCESS', value: false), string(name: 'MYHOSTTYPES', value: ''), string(name: 'CV_GIT_URL', value: scmVars ? scmVars.getUserRemoteConfigs()[0].getUrl() : DEFAULT_CLUSTERVERSE_URL), string(name: 'CV_GIT_BRANCH', value: CV_OPS_GIT_BRANCH), string(name: 'USER_CMDLINE_VARS', value: stageBuild.getUserParamsString())] + }) + + //Need to redeploy original cluster (without scaling), to test that scaling works with next redeploy test (canary=none) + if (env.SCALEUPDOWN == 'scaleup' || env.SCALEUPDOWN == 'scaledown') { + cluster_vars_override.remove("${env.BUILDENV}") + stageBuild.userParams.put("cluster_vars_override", "\\\'" + groovy.json.JsonOutput.toJson(cluster_vars_override).replace("\"", "\\\"") + "\\\'") + stageBuild.userParams.put("release_version", "2_5_0") + stage_cvops('deploy clean original (unscaled) for next test', stageBuild, { + build job: 'clusterverse/clusterverse-ops', parameters: [string(name: 'APP_NAME', value: "cvtest-${BUILD_NUMBER}-${env.BUILD_HASH}"), string(name: 'CLOUD_REGION', value: env.CLOUD_REGION), string(name: 'BUILDENV', value: env.BUILDENV), string(name: 'CLUSTER_ID', value: env.CLUSTER_ID), booleanParam(name: 'DNS_FORCE_DISABLE', value: env.DNS_FORCE_DISABLE), string(name: 'DEPLOY_TYPE', value: 'deploy'), string(name: 'REDEPLOY_SCHEME', value: ''), string(name: 'CANARY', value: 'none'), booleanParam(name: 'CANARY_TIDY_ON_SUCCESS', value: true), string(name: 'MYHOSTTYPES', value: ''), string(name: 'CV_GIT_URL', value: scmVars ? scmVars.getUserRemoteConfigs()[0].getUrl() : DEFAULT_CLUSTERVERSE_URL), string(name: 'CV_GIT_BRANCH', value: CV_OPS_GIT_BRANCH), string(name: 'USER_CMDLINE_VARS', value: stageBuild.getUserParamsString() + " -e clean=_all_")] + }) + + //Re-add the scaleup/down cmdline for next redeploy test (canary=none) + if (env.SCALEUPDOWN == 'scaleup') { + cluster_vars_override += ["${env.BUILDENV}": [hosttype_vars: [sys: [vms_by_az: [b: 1, c: 1]]]]] // AZ 'c' is not set normally + } else if (env.SCALEUPDOWN == 'scaledown') { + cluster_vars_override += ["${env.BUILDENV}": [hosttype_vars: [sys: [vms_by_az: [b: 0, c: 0]]]]] // AZ 'b' is set normally + } + stageBuild.userParams.put("cluster_vars_override", "\\\'" + groovy.json.JsonOutput.toJson(cluster_vars_override).replace("\"", "\\\"") + "\\\'") + } + + // Run the canary=none redeploy + stageBuild.userParams.put("release_version", "3_0_0") + stage_cvops('redeploy canary=none (tidy_on_success)', stageBuild, { + build job: 'clusterverse/clusterverse-ops', parameters: [string(name: 'APP_NAME', value: "cvtest-${env.BUILD_NUMBER}-${env.BUILD_HASH}"), string(name: 'CLOUD_REGION', value: env.CLOUD_REGION), string(name: 'BUILDENV', value: env.BUILDENV), string(name: 'CLUSTER_ID', value: env.CLUSTER_ID), booleanParam(name: 'DNS_FORCE_DISABLE', value: env.DNS_FORCE_DISABLE), string(name: 'DEPLOY_TYPE', value: 'redeploy'), string(name: 'REDEPLOY_SCHEME', value: (env.REDEPLOY_SCHEME ? env.REDEPLOY_SCHEME : '')), string(name: 'CANARY', value: 'none'), booleanParam(name: 'CANARY_TIDY_ON_SUCCESS', value: true), string(name: 'MYHOSTTYPES', value: ''), string(name: 'CV_GIT_URL', value: scmVars ? scmVars.getUserRemoteConfigs()[0].getUrl() : DEFAULT_CLUSTERVERSE_URL), string(name: 'CV_GIT_BRANCH', value: CV_OPS_GIT_BRANCH), string(name: 'USER_CMDLINE_VARS', value: stageBuild.getUserParamsString())] + }) + } else { + stage_cvops('Redeploy not requested', stageBuild, { + echo "Redeploy testing not requested" + }) + } + + stage_cvops('deploy on top', stageBuild, { + build job: 'clusterverse/clusterverse-ops', parameters: [string(name: 'APP_NAME', value: "cvtest-${env.BUILD_NUMBER}-${env.BUILD_HASH}"), string(name: 'CLOUD_REGION', value: env.CLOUD_REGION), string(name: 'BUILDENV', value: env.BUILDENV), string(name: 'CLUSTER_ID', value: env.CLUSTER_ID), booleanParam(name: 'DNS_FORCE_DISABLE', value: env.DNS_FORCE_DISABLE), string(name: 'DEPLOY_TYPE', value: 'deploy'), string(name: 'REDEPLOY_SCHEME', value: ''), string(name: 'CANARY', value: 'none'), booleanParam(name: 'CANARY_TIDY_ON_SUCCESS', value: true), string(name: 'MYHOSTTYPES', value: ''), string(name: 'CV_GIT_URL', value: scmVars ? scmVars.getUserRemoteConfigs()[0].getUrl() : DEFAULT_CLUSTERVERSE_URL), string(name: 'CV_GIT_BRANCH', value: CV_OPS_GIT_BRANCH), string(name: 'USER_CMDLINE_VARS', value: stageBuild.getUserParamsString())] + }) + + if (stageBuild.result == 'SUCCESS' || params.CLEAN_ON_FAILURE == 'true') { + stage('clean') { + if (stageBuild.result != 'SUCCESS') { + echo "Stage failure: Running clean-up on cluster..." + } + catchError { + build job: 'clusterverse/clusterverse-ops', parameters: [string(name: 'APP_NAME', value: "cvtest-${env.BUILD_NUMBER}-${env.BUILD_HASH}"), string(name: 'CLOUD_REGION', value: env.CLOUD_REGION), string(name: 'BUILDENV', value: env.BUILDENV), string(name: 'CLUSTER_ID', value: env.CLUSTER_ID), booleanParam(name: 'DNS_FORCE_DISABLE', value: env.DNS_FORCE_DISABLE), string(name: 'DEPLOY_TYPE', value: 'clean'), string(name: 'REDEPLOY_SCHEME', value: ''), string(name: 'CANARY', value: 'none'), booleanParam(name: 'CANARY_TIDY_ON_SUCCESS', value: true), string(name: 'MYHOSTTYPES', value: ''), string(name: 'CV_GIT_URL', value: scmVars ? scmVars.getUserRemoteConfigs()[0].getUrl() : DEFAULT_CLUSTERVERSE_URL), string(name: 'CV_GIT_BRANCH', value: CV_OPS_GIT_BRANCH), string(name: 'USER_CMDLINE_VARS', value: stageBuild.getUserParamsString())] + } + } + } + } } - } } - } - } ]) @@ -264,87 +301,121 @@ CVTEST_NOMYHOSTTYPES = new MatrixBuilder([ // The logic of doing this is different to the matrix without myhosttypes, hence a separate matrix. /**************************************************************************************/ CVTEST_MYHOSTTYPES = new MatrixBuilder([ - jenkinsParamsCopy: params, - clJenkinsParamsMutate: { jenkinsParamsCopy -> - jenkinsParamsCopy.remove('MYHOSTTYPES_LIST') - jenkinsParamsCopy.remove('MYHOSTTYPES_TEST') - jenkinsParamsCopy.remove('MYHOSTTYPES_SERIAL_PARALLEL') - jenkinsParamsCopy.remove('CLEAN_ON_FAILURE') - return jenkinsParamsCopy - }, - clMatrixAxesFilter: { axis -> - !(params.DNS_TEST == 'both' && axis['DNS_FORCE_DISABLE'] == 'true' && axis['CLOUD_REGION'] == 'esxifree/dougalab') && - !(axis['IMAGE_TESTED'] != '_ubuntu2004image' && axis['CLOUD_REGION'] == 'esxifree/dougalab') && - !(axis['REDEPLOY_SCHEME'] == '_scheme_addallnew_rmdisk_rollback') - }, - clTaskMap: { axisEnvVars -> - node { - withEnv(axisEnvVars) { - withCredentials([string(credentialsId: "VAULT_PASSWORD_${env.BUILDENV.toUpperCase()}", variable: 'VAULT_PASSWORD_BUILDENV')]) { - env.VAULT_PASSWORD_BUILDENV = VAULT_PASSWORD_BUILDENV - } - sh 'printenv | sort' - def stageBuild = new cStageBuild([result: 'SUCCESS']) - - if (env.IMAGE_TESTED) { - stageBuild.userParams.put("cluster_vars_override", "\\\'{\\\"image\\\":\\\"{{${env.IMAGE_TESTED}}}\\\"}\\\'") //NOTE: NO SPACES are allowed in this!! - } - - if (env.REDEPLOY_SCHEME) { - if (params.MYHOSTTYPES_LIST == '') { - currentBuild.result = 'FAILURE' - stageBuild.result = 'FAILURE' - unstable('Stage failed! Error was: ' + err) // OR: 'error "Stage failure"' or 'throw new org.jenkinsci.plugins.workflow.steps.FlowInterruptedException(hudson.model.Result.FAILURE)', but both of these fail all future stages, preventing us calling the clean. - } - - stageBuild.userParams.put("skip_release_version_check", "true") - stageBuild.userParams.put("release_version", "1_0_0") - stage_cvops('deploy', stageBuild, { - build job: 'clusterverse/clusterverse-ops', parameters: [string(name: 'APP_NAME', value: "cvtest-${BUILD_NUMBER}-${env.BUILD_HASH}"), string(name: 'CLOUD_REGION', value: env.CLOUD_REGION), string(name: 'BUILDENV', value: env.BUILDENV), string(name: 'CLUSTER_ID', value: env.CLUSTER_ID), booleanParam(name: 'DNS_FORCE_DISABLE', value: env.DNS_FORCE_DISABLE), string(name: 'DEPLOY_TYPE', value: 'deploy'), string(name: 'REDEPLOY_SCHEME', value: ''), string(name: 'CANARY', value: 'none'), booleanParam(name: 'CANARY_TIDY_ON_SUCCESS', value: true), string(name: 'MYHOSTTYPES', value: ''), string(name: 'CV_GIT_URL', value: scmVars ? scmVars.getUserRemoteConfigs()[0].getUrl() : DEFAULT_CLUSTERVERSE_URL), string(name: 'CV_GIT_BRANCH', value: CV_OPS_GIT_BRANCH), string(name: 'USER_CMDLINE_VARS', value: stageBuild.userParamsString())] - }) - - // Run the split redeploy over all hosttypes - stageBuild.userParams.put("release_version", "2_0_0") - params.MYHOSTTYPES_LIST.split(',').each({ my_host_type -> - stage_cvops("redeploy canary=start ($my_host_type)", stageBuild, { - build job: 'clusterverse/clusterverse-ops', parameters: [string(name: 'APP_NAME', value: "cvtest-${env.BUILD_NUMBER}-${env.BUILD_HASH}"), string(name: 'CLOUD_REGION', value: env.CLOUD_REGION), string(name: 'BUILDENV', value: env.BUILDENV), string(name: 'CLUSTER_ID', value: env.CLUSTER_ID), booleanParam(name: 'DNS_FORCE_DISABLE', value: DNS_FORCE_DISABLE), string(name: 'DEPLOY_TYPE', value: 'redeploy'), string(name: 'REDEPLOY_SCHEME', value: (env.REDEPLOY_SCHEME ? env.REDEPLOY_SCHEME : '')), string(name: 'CANARY', value: 'start'), booleanParam(name: 'CANARY_TIDY_ON_SUCCESS', value: false), string(name: 'MYHOSTTYPES', value: my_host_type), string(name: 'CV_GIT_URL', value: scmVars ? scmVars.getUserRemoteConfigs()[0].getUrl() : DEFAULT_CLUSTERVERSE_URL), string(name: 'CV_GIT_BRANCH', value: CV_OPS_GIT_BRANCH), string(name: 'USER_CMDLINE_VARS', value: stageBuild.userParamsString())] - }) - - stage_cvops("redeploy canary=finish ($my_host_type)", stageBuild, { - build job: 'clusterverse/clusterverse-ops', parameters: [string(name: 'APP_NAME', value: "cvtest-${env.BUILD_NUMBER}-${env.BUILD_HASH}"), string(name: 'CLOUD_REGION', value: env.CLOUD_REGION), string(name: 'BUILDENV', value: env.BUILDENV), string(name: 'CLUSTER_ID', value: env.CLUSTER_ID), booleanParam(name: 'DNS_FORCE_DISABLE', value: DNS_FORCE_DISABLE), string(name: 'DEPLOY_TYPE', value: 'redeploy'), string(name: 'REDEPLOY_SCHEME', value: (env.REDEPLOY_SCHEME ? env.REDEPLOY_SCHEME : '')), string(name: 'CANARY', value: 'finish'), booleanParam(name: 'CANARY_TIDY_ON_SUCCESS', value: false), string(name: 'MYHOSTTYPES', value: my_host_type), string(name: 'CV_GIT_URL', value: scmVars ? scmVars.getUserRemoteConfigs()[0].getUrl() : DEFAULT_CLUSTERVERSE_URL), string(name: 'CV_GIT_BRANCH', value: CV_OPS_GIT_BRANCH), string(name: 'USER_CMDLINE_VARS', value: stageBuild.userParamsString())] - }) - - stage_cvops("redeploy canary=tidy ($my_host_type)", stageBuild, { - build job: 'clusterverse/clusterverse-ops', parameters: [string(name: 'APP_NAME', value: "cvtest-${env.BUILD_NUMBER}-${env.BUILD_HASH}"), string(name: 'CLOUD_REGION', value: env.CLOUD_REGION), string(name: 'BUILDENV', value: env.BUILDENV), string(name: 'CLUSTER_ID', value: env.CLUSTER_ID), booleanParam(name: 'DNS_FORCE_DISABLE', value: DNS_FORCE_DISABLE), string(name: 'DEPLOY_TYPE', value: 'redeploy'), string(name: 'REDEPLOY_SCHEME', value: (env.REDEPLOY_SCHEME ? env.REDEPLOY_SCHEME : '')), string(name: 'CANARY', value: 'tidy'), booleanParam(name: 'CANARY_TIDY_ON_SUCCESS', value: false), string(name: 'MYHOSTTYPES', value: my_host_type), string(name: 'CV_GIT_URL', value: scmVars ? scmVars.getUserRemoteConfigs()[0].getUrl() : DEFAULT_CLUSTERVERSE_URL), string(name: 'CV_GIT_BRANCH', value: CV_OPS_GIT_BRANCH), string(name: 'USER_CMDLINE_VARS', value: stageBuild.userParamsString())] - }) - }) - - // Run the mono-redeploy over all hosttypes - stageBuild.userParams.put("release_version", "3_0_0") - params.MYHOSTTYPES_LIST.split(',').each({ my_host_type -> - stage_cvops("redeploy canary=none ($my_host_type) (tidy_on_success)", stageBuild, { - build job: 'clusterverse/clusterverse-ops', parameters: [string(name: 'APP_NAME', value: "cvtest-${env.BUILD_NUMBER}-${env.BUILD_HASH}"), string(name: 'CLOUD_REGION', value: env.CLOUD_REGION), string(name: 'BUILDENV', value: env.BUILDENV), string(name: 'CLUSTER_ID', value: env.CLUSTER_ID), booleanParam(name: 'DNS_FORCE_DISABLE', value: env.DNS_FORCE_DISABLE), string(name: 'DEPLOY_TYPE', value: 'redeploy'), string(name: 'REDEPLOY_SCHEME', value: (env.REDEPLOY_SCHEME ? env.REDEPLOY_SCHEME : '')), string(name: 'CANARY', value: 'none'), booleanParam(name: 'CANARY_TIDY_ON_SUCCESS', value: true), string(name: 'MYHOSTTYPES', value: my_host_type), string(name: 'CV_GIT_URL', value: scmVars ? scmVars.getUserRemoteConfigs()[0].getUrl() : DEFAULT_CLUSTERVERSE_URL), string(name: 'CV_GIT_BRANCH', value: CV_OPS_GIT_BRANCH), string(name: 'USER_CMDLINE_VARS', value: stageBuild.userParamsString())] - }) - }) - - if (stageBuild.result == 'SUCCESS' || params.CLEAN_ON_FAILURE == 'true') { - stage('clean') { - if (stageBuild.result != 'SUCCESS') { - echo "Stage failure: Running clean-up on cluster..." - } - catchError { - build job: 'clusterverse/clusterverse-ops', parameters: [string(name: 'APP_NAME', value: "cvtest-${env.BUILD_NUMBER}-${env.BUILD_HASH}"), string(name: 'CLOUD_REGION', value: env.CLOUD_REGION), string(name: 'BUILDENV', value: env.BUILDENV), string(name: 'CLUSTER_ID', value: env.CLUSTER_ID), booleanParam(name: 'DNS_FORCE_DISABLE', value: env.DNS_FORCE_DISABLE), string(name: 'DEPLOY_TYPE', value: 'clean'), string(name: 'REDEPLOY_SCHEME', value: ''), string(name: 'CANARY', value: 'none'), booleanParam(name: 'CANARY_TIDY_ON_SUCCESS', value: true), string(name: 'MYHOSTTYPES', value: ''), string(name: 'CV_GIT_URL', value: scmVars ? scmVars.getUserRemoteConfigs()[0].getUrl() : DEFAULT_CLUSTERVERSE_URL), string(name: 'CV_GIT_BRANCH', value: CV_OPS_GIT_BRANCH), string(name: 'USER_CMDLINE_VARS', value: stageBuild.userParamsString())] + jenkinsParams: params, + clJenkinsParamsMutate: { jenkinsParams -> + jenkinsParams.remove('MYHOSTTYPES_LIST') + jenkinsParams.remove('MYHOSTTYPES_TEST') + jenkinsParams.remove('MYHOSTTYPES_SERIAL_PARALLEL') + jenkinsParams.remove('CLEAN_ON_FAILURE') + return jenkinsParams + }, + clMatrixAxesFilter: { axis -> + !(params.DNS_TEST == 'both' && axis['DNS_FORCE_DISABLE'] == 'true' && axis['CLOUD_REGION'] == 'esxifree/dougalab') && /* DNS is not supported in dougalab */ + !(axis['IMAGE_TESTED'] != '_ubuntu2004image' && axis['CLOUD_REGION'] == 'esxifree/dougalab') && /* Only _ubuntu2004image is supported in dougalab */ + !(axis['REDEPLOY_SCHEME'] == '_scheme_addallnew_rmdisk_rollback') && /* _scheme_addallnew_rmdisk_rollback is not supported with myhostttpes set */ + !(axis['REDEPLOY_SCHEME'] == '_scheme_rmvm_keepdisk_rollback' && axis['CLOUD_REGION'].startsWith('azure/')) && /* _scheme_rmvm_keepdisk_rollback not supported in Azure */ + !(axis['REDEPLOY_SCHEME'] == '_scheme_rmvm_rmdisk_only' && axis['SCALEUPDOWN'] == 'scaledown') && /* _scheme_rmvm_rmdisk_only only supports scaling up */ + !(axis['REDEPLOY_SCHEME'] == '_scheme_rmvm_keepdisk_rollback' && (axis['SCALEUPDOWN'] == 'scaledown' || axis['SCALEUPDOWN'] == 'scaleup')) /* _scheme_rmvm_keepdisk_rollback does not support scaling */ + }, + clTaskMap: { axisEnvVars -> + node { + withEnv(axisEnvVars) { + withCredentials([string(credentialsId: "VAULT_PASSWORD_${env.BUILDENV.toUpperCase()}", variable: 'VAULT_PASSWORD_BUILDENV')]) { + env.VAULT_PASSWORD_BUILDENV = VAULT_PASSWORD_BUILDENV + } + sh 'printenv | sort' + def stageBuild = new cStageBuild([result: 'SUCCESS']) + HashMap cluster_vars_override = [:] + + if (env.IMAGE_TESTED) { + cluster_vars_override += [image: "{{${env.IMAGE_TESTED}}}"] + stageBuild.userParams.put("cluster_vars_override", "\\\'" + groovy.json.JsonOutput.toJson(cluster_vars_override).replace("\"", "\\\"") + "\\\'") //NOTE: NO SPACES are allowed in this!! + } + + if (env.REDEPLOY_SCHEME) { + if (params.MYHOSTTYPES_LIST == '') { + currentBuild.result = 'FAILURE' + stageBuild.result = 'FAILURE' + unstable('Stage failed! Error was: ' + err) // OR: 'error "Stage failure"' or 'throw new org.jenkinsci.plugins.workflow.steps.FlowInterruptedException(hudson.model.Result.FAILURE)', but both of these fail all future stages, preventing us calling the clean. + } + + stageBuild.userParams.put("skip_release_version_check", "true") + stageBuild.userParams.put("release_version", "1_0_0") + stage_cvops('deploy', stageBuild, { + build job: 'clusterverse/clusterverse-ops', parameters: [string(name: 'APP_NAME', value: "cvtest-${BUILD_NUMBER}-${env.BUILD_HASH}"), string(name: 'CLOUD_REGION', value: env.CLOUD_REGION), string(name: 'BUILDENV', value: env.BUILDENV), string(name: 'CLUSTER_ID', value: env.CLUSTER_ID), booleanParam(name: 'DNS_FORCE_DISABLE', value: env.DNS_FORCE_DISABLE), string(name: 'DEPLOY_TYPE', value: 'deploy'), string(name: 'REDEPLOY_SCHEME', value: ''), string(name: 'CANARY', value: 'none'), booleanParam(name: 'CANARY_TIDY_ON_SUCCESS', value: true), string(name: 'MYHOSTTYPES', value: ''), string(name: 'CV_GIT_URL', value: scmVars ? scmVars.getUserRemoteConfigs()[0].getUrl() : DEFAULT_CLUSTERVERSE_URL), string(name: 'CV_GIT_BRANCH', value: CV_OPS_GIT_BRANCH), string(name: 'USER_CMDLINE_VARS', value: stageBuild.getUserParamsString())] + }) + + // Update the clustervars with new scaled cluster size + if (env.SCALEUPDOWN == 'scaleup') { + cluster_vars_override += ["${env.BUILDENV}": [hosttype_vars: [sys: [vms_by_az: [b: 1, c: 1]]]]] // AZ 'c' is not set normally + } else if (env.SCALEUPDOWN == 'scaledown') { + cluster_vars_override += ["${env.BUILDENV}": [hosttype_vars: [sys: [vms_by_az: [b: 0, c: 0]]]]] // AZ 'b' is set normally + } + + if (cluster_vars_override.size()) { + stageBuild.userParams.put("cluster_vars_override", "\\\'" + groovy.json.JsonOutput.toJson(cluster_vars_override).replace("\"", "\\\"") + "\\\'") //NOTE: NO SPACES are allowed in this!! + } + + // Run the split redeploy over all hosttypes + stageBuild.userParams.put("release_version", "2_0_0") + params.MYHOSTTYPES_LIST.split(',').each({ my_host_type -> + stage_cvops("redeploy canary=start ($my_host_type)", stageBuild, { + build job: 'clusterverse/clusterverse-ops', parameters: [string(name: 'APP_NAME', value: "cvtest-${env.BUILD_NUMBER}-${env.BUILD_HASH}"), string(name: 'CLOUD_REGION', value: env.CLOUD_REGION), string(name: 'BUILDENV', value: env.BUILDENV), string(name: 'CLUSTER_ID', value: env.CLUSTER_ID), booleanParam(name: 'DNS_FORCE_DISABLE', value: env.DNS_FORCE_DISABLE), string(name: 'DEPLOY_TYPE', value: 'redeploy'), string(name: 'REDEPLOY_SCHEME', value: (env.REDEPLOY_SCHEME ? env.REDEPLOY_SCHEME : '')), string(name: 'CANARY', value: 'start'), booleanParam(name: 'CANARY_TIDY_ON_SUCCESS', value: false), string(name: 'MYHOSTTYPES', value: my_host_type), string(name: 'CV_GIT_URL', value: scmVars ? scmVars.getUserRemoteConfigs()[0].getUrl() : DEFAULT_CLUSTERVERSE_URL), string(name: 'CV_GIT_BRANCH', value: CV_OPS_GIT_BRANCH), string(name: 'USER_CMDLINE_VARS', value: stageBuild.getUserParamsString())] + }) + + stage_cvops("redeploy canary=finish ($my_host_type)", stageBuild, { + build job: 'clusterverse/clusterverse-ops', parameters: [string(name: 'APP_NAME', value: "cvtest-${env.BUILD_NUMBER}-${env.BUILD_HASH}"), string(name: 'CLOUD_REGION', value: env.CLOUD_REGION), string(name: 'BUILDENV', value: env.BUILDENV), string(name: 'CLUSTER_ID', value: env.CLUSTER_ID), booleanParam(name: 'DNS_FORCE_DISABLE', value: env.DNS_FORCE_DISABLE), string(name: 'DEPLOY_TYPE', value: 'redeploy'), string(name: 'REDEPLOY_SCHEME', value: (env.REDEPLOY_SCHEME ? env.REDEPLOY_SCHEME : '')), string(name: 'CANARY', value: 'finish'), booleanParam(name: 'CANARY_TIDY_ON_SUCCESS', value: false), string(name: 'MYHOSTTYPES', value: my_host_type), string(name: 'CV_GIT_URL', value: scmVars ? scmVars.getUserRemoteConfigs()[0].getUrl() : DEFAULT_CLUSTERVERSE_URL), string(name: 'CV_GIT_BRANCH', value: CV_OPS_GIT_BRANCH), string(name: 'USER_CMDLINE_VARS', value: stageBuild.getUserParamsString())] + }) + + stage_cvops("redeploy canary=tidy ($my_host_type)", stageBuild, { + build job: 'clusterverse/clusterverse-ops', parameters: [string(name: 'APP_NAME', value: "cvtest-${env.BUILD_NUMBER}-${env.BUILD_HASH}"), string(name: 'CLOUD_REGION', value: env.CLOUD_REGION), string(name: 'BUILDENV', value: env.BUILDENV), string(name: 'CLUSTER_ID', value: env.CLUSTER_ID), booleanParam(name: 'DNS_FORCE_DISABLE', value: env.DNS_FORCE_DISABLE), string(name: 'DEPLOY_TYPE', value: 'redeploy'), string(name: 'REDEPLOY_SCHEME', value: (env.REDEPLOY_SCHEME ? env.REDEPLOY_SCHEME : '')), string(name: 'CANARY', value: 'tidy'), booleanParam(name: 'CANARY_TIDY_ON_SUCCESS', value: false), string(name: 'MYHOSTTYPES', value: my_host_type), string(name: 'CV_GIT_URL', value: scmVars ? scmVars.getUserRemoteConfigs()[0].getUrl() : DEFAULT_CLUSTERVERSE_URL), string(name: 'CV_GIT_BRANCH', value: CV_OPS_GIT_BRANCH), string(name: 'USER_CMDLINE_VARS', value: stageBuild.getUserParamsString())] + }) + }) + + //Need to redeploy original cluster (without scaling), to test that scaling works with next redeploy test (canary=none) + if (env.SCALEUPDOWN == 'scaleup' || env.SCALEUPDOWN == 'scaledown') { + cluster_vars_override.remove("${env.BUILDENV}") + stageBuild.userParams.put("cluster_vars_override", "\\\'" + groovy.json.JsonOutput.toJson(cluster_vars_override).replace("\"", "\\\"") + "\\\'") + stageBuild.userParams.put("release_version", "2_5_0") + stage_cvops('deploy clean original (unscaled) for next test', stageBuild, { + build job: 'clusterverse/clusterverse-ops', parameters: [string(name: 'APP_NAME', value: "cvtest-${BUILD_NUMBER}-${env.BUILD_HASH}"), string(name: 'CLOUD_REGION', value: env.CLOUD_REGION), string(name: 'BUILDENV', value: env.BUILDENV), string(name: 'CLUSTER_ID', value: env.CLUSTER_ID), booleanParam(name: 'DNS_FORCE_DISABLE', value: env.DNS_FORCE_DISABLE), string(name: 'DEPLOY_TYPE', value: 'deploy'), string(name: 'REDEPLOY_SCHEME', value: ''), string(name: 'CANARY', value: 'none'), booleanParam(name: 'CANARY_TIDY_ON_SUCCESS', value: true), string(name: 'MYHOSTTYPES', value: ''), string(name: 'CV_GIT_URL', value: scmVars ? scmVars.getUserRemoteConfigs()[0].getUrl() : DEFAULT_CLUSTERVERSE_URL), string(name: 'CV_GIT_BRANCH', value: CV_OPS_GIT_BRANCH), string(name: 'USER_CMDLINE_VARS', value: stageBuild.getUserParamsString() + " -e clean=_all_")] + }) + + //Re-add the scaleup/down cmdline for next redeploy test (canary=none) + if (env.SCALEUPDOWN == 'scaleup') { + cluster_vars_override += ["${env.BUILDENV}": [hosttype_vars: [sys: [vms_by_az: [b: 1, c: 1]]]]] // AZ 'c' is not set normally + } else if (env.SCALEUPDOWN == 'scaledown') { + cluster_vars_override += ["${env.BUILDENV}": [hosttype_vars: [sys: [vms_by_az: [b: 0, c: 0]]]]] // AZ 'b' is set normally + } + stageBuild.userParams.put("cluster_vars_override", "\\\'" + groovy.json.JsonOutput.toJson(cluster_vars_override).replace("\"", "\\\"") + "\\\'") + } + + // Run the canary=none redeploy over all hosttypes + stageBuild.userParams.put("release_version", "3_0_0") + params.MYHOSTTYPES_LIST.split(',').each({ my_host_type -> + stage_cvops("redeploy canary=none ($my_host_type) (tidy_on_success)", stageBuild, { + build job: 'clusterverse/clusterverse-ops', parameters: [string(name: 'APP_NAME', value: "cvtest-${env.BUILD_NUMBER}-${env.BUILD_HASH}"), string(name: 'CLOUD_REGION', value: env.CLOUD_REGION), string(name: 'BUILDENV', value: env.BUILDENV), string(name: 'CLUSTER_ID', value: env.CLUSTER_ID), booleanParam(name: 'DNS_FORCE_DISABLE', value: env.DNS_FORCE_DISABLE), string(name: 'DEPLOY_TYPE', value: 'redeploy'), string(name: 'REDEPLOY_SCHEME', value: (env.REDEPLOY_SCHEME ? env.REDEPLOY_SCHEME : '')), string(name: 'CANARY', value: 'none'), booleanParam(name: 'CANARY_TIDY_ON_SUCCESS', value: true), string(name: 'MYHOSTTYPES', value: my_host_type), string(name: 'CV_GIT_URL', value: scmVars ? scmVars.getUserRemoteConfigs()[0].getUrl() : DEFAULT_CLUSTERVERSE_URL), string(name: 'CV_GIT_BRANCH', value: CV_OPS_GIT_BRANCH), string(name: 'USER_CMDLINE_VARS', value: stageBuild.getUserParamsString())] + }) + }) + + if (stageBuild.result == 'SUCCESS' || params.CLEAN_ON_FAILURE == 'true') { + stage('clean') { + if (stageBuild.result != 'SUCCESS') { + echo "Stage failure: Running clean-up on cluster..." + } + catchError { + build job: 'clusterverse/clusterverse-ops', parameters: [string(name: 'APP_NAME', value: "cvtest-${env.BUILD_NUMBER}-${env.BUILD_HASH}"), string(name: 'CLOUD_REGION', value: env.CLOUD_REGION), string(name: 'BUILDENV', value: env.BUILDENV), string(name: 'CLUSTER_ID', value: env.CLUSTER_ID), booleanParam(name: 'DNS_FORCE_DISABLE', value: env.DNS_FORCE_DISABLE), string(name: 'DEPLOY_TYPE', value: 'clean'), string(name: 'REDEPLOY_SCHEME', value: ''), string(name: 'CANARY', value: 'none'), booleanParam(name: 'CANARY_TIDY_ON_SUCCESS', value: true), string(name: 'MYHOSTTYPES', value: ''), string(name: 'CV_GIT_URL', value: scmVars ? scmVars.getUserRemoteConfigs()[0].getUrl() : DEFAULT_CLUSTERVERSE_URL), string(name: 'CV_GIT_BRANCH', value: CV_OPS_GIT_BRANCH), string(name: 'USER_CMDLINE_VARS', value: stageBuild.getUserParamsString())] + } + } + } + } else { + stage_cvops('Redeploy not requested', stageBuild, { + echo "Redeploy testing not requested" + }) + } } - } } - } else { - stage_cvops('Redeploy not requested', stageBuild, { - echo "Redeploy testing not requested" - }) - } } - } - } ]) @@ -354,60 +425,60 @@ CVTEST_MYHOSTTYPES = new MatrixBuilder([ // A check stage - no actual work stage('Check Environment') { - node { - sh 'printenv | sort' - println(params.inspect()) - if (params.BUILDENV == '') { -// currentBuild.result = 'ABORTED' -// error "BUILDENV not defined" - unstable("BUILDENV not defined") - throw new org.jenkinsci.plugins.workflow.steps.FlowInterruptedException(hudson.model.Result.ABORTED) + node { + sh 'printenv | sort' + println(params.inspect()) + if (params.BUILDENV == '') { +// currentBuild.result = 'ABORTED' +// error "BUILDENV not defined" + unstable("BUILDENV not defined") + throw new org.jenkinsci.plugins.workflow.steps.FlowInterruptedException(hudson.model.Result.ABORTED) + } } - } } // A map to be loaded with matrices (of stages) HashMap matrixBuilds = [:] -// A 'self-test' matrix. Only outputs debug. +//// A 'self-test' matrix. Only outputs debug. //matrixBuilds["SELFTEST1 Matrix builds"] = { -// stage("SELFTEST Matrix builds") { -// echo("Matrix 'params' used to build Matrix axes: \n" + SELFTEST._getMatrixParams().inspect() + "\n") -// echo("Matrix axes: \n" + SELFTEST._getMatrixAxes().inspect() + "\n") -// parallel(SELFTEST.getTaskMap()) -// } +// stage("SELFTEST Matrix builds") { +// echo("Matrix 'params' used to build Matrix axes: \n" + SELFTEST._getMatrixParams().inspect() + "\n") +// echo("Matrix axes: \n" + SELFTEST._getMatrixAxes().inspect() + "\n") +// parallel(SELFTEST.getTaskMap()) +// } //} // A matrix of tests that test pipelines *without* myhosttypes configured if (params.MYHOSTTYPES_TEST.split(',').contains('nomyhosttypes')) { - matrixBuilds["NOMYHOSTTYPES Matrix builds"] = { - stage("NOMYHOSTTYPES Matrix builds") { - echo("Matrix 'params' used to build Matrix axes: \n" + CVTEST_NOMYHOSTTYPES._getMatrixParams().inspect() + "\n") - echo("Matrix axes: \n" + CVTEST_NOMYHOSTTYPES._getMatrixAxes().inspect() + "\n") - parallel(CVTEST_NOMYHOSTTYPES.getTaskMap()) + matrixBuilds["NOMYHOSTTYPES Matrix builds"] = { + stage("NOMYHOSTTYPES Matrix builds") { + echo("Matrix 'params' used to build Matrix axes: \n" + CVTEST_NOMYHOSTTYPES._getMatrixParams().inspect() + "\n") + echo("Matrix axes: \n" + CVTEST_NOMYHOSTTYPES._getMatrixAxes().inspect() + "\n") + parallel(CVTEST_NOMYHOSTTYPES.getTaskMap()) + } } - } } // A matrix of tests that test pipelines *with* myhosttypes configured if (params.MYHOSTTYPES_TEST.split(',').contains('myhosttypes')) { - matrixBuilds["MYHOSTTYPES Matrix builds"] = { - stage("MYHOSTTYPES Matrix builds") { - echo("Matrix 'params' used to build Matrix axes: \n" + CVTEST_MYHOSTTYPES._getMatrixParams().inspect() + "\n") - echo("Matrix axes: \n" + CVTEST_MYHOSTTYPES._getMatrixAxes().inspect() + "\n") - parallel(CVTEST_MYHOSTTYPES.getTaskMap()) + matrixBuilds["MYHOSTTYPES Matrix builds"] = { + stage("MYHOSTTYPES Matrix builds") { + echo("Matrix 'params' used to build Matrix axes: \n" + CVTEST_MYHOSTTYPES._getMatrixParams().inspect() + "\n") + echo("Matrix axes: \n" + CVTEST_MYHOSTTYPES._getMatrixAxes().inspect() + "\n") + parallel(CVTEST_MYHOSTTYPES.getTaskMap()) + } } - } } // Run the matrices in parallel if the MYHOSTTYPES_SERIAL_PARALLEL parameter is set (makes in mess in Blue Ocean, but is faster). Else run serially. if (params.MYHOSTTYPES_SERIAL_PARALLEL == 'parallel') { - stage("All matrices") { - parallel(matrixBuilds) - } + stage("All matrices") { + parallel(matrixBuilds) + } } else { - matrixBuilds.each { matrix -> - matrix.value.call() - } + matrixBuilds.each { matrix -> + matrix.value.call() + } }