diff --git a/playbooks/configure.yaml b/playbooks/configure.yaml index f01e544..2767bc1 100644 --- a/playbooks/configure.yaml +++ b/playbooks/configure.yaml @@ -3,20 +3,15 @@ - hosts: localhost connection: local roles: - - instance-groups + - gce-instance-groups - hosts: cluster_hosts roles: - - cluster-variables - - docker-storage-setup + - gce-cluster-variables + - gce-docker-storage-setup - gce-cloudconfig - frequent-log-rotation -- hosts: schedulable_nodes - gather_facts: no - roles: - - openshift-volume-quota - # 3.7 requires std_include in order to invoke os_firewall (for now). Conditionally include it so # we don't break for older versions. Should be removed when os_firewall becomes a module. - hosts: localhost @@ -48,6 +43,24 @@ dest: "/tmp/" flat: yes +- hosts: primary_master + gather_facts: no + roles: + - openshift-roles + - master-validate + +- hosts: localhost + tasks: + - name: Validate the public address from outside the cluster + uri: + url: "https://{{ openshift_master_cluster_public_hostname }}:{{ console_port }}/healthz/ready" + validate_certs: False + method: GET + register: resp + until: resp.status == 200 + retries: 6 + delay: 5 + - hosts: infra_nodes gather_facts: no roles: @@ -59,24 +72,5 @@ - hosts: app_nodes gather_facts: no roles: - - restrict-gce-metadata - -- hosts: schedulable_nodes - gather_facts: no - roles: - - openshift-emptydir-quota - -- hosts: primary_master - gather_facts: no - roles: - - openshift-registry - - openshift-roles - -- hosts: masters - gather_facts: no - roles: - - validate-masters - -- hosts: localhost - roles: - - validate-public + - gce-node-restrict-metadata + - node-emptydir-quota diff --git a/playbooks/group_vars/all/00_playbook_defaults.yaml b/playbooks/group_vars/all/00_playbook_defaults.yaml index f380965..173c4e7 120000 --- a/playbooks/group_vars/all/00_playbook_defaults.yaml +++ b/playbooks/group_vars/all/00_playbook_defaults.yaml @@ -1 +1 @@ -../../roles/cluster-variables/vars/main.yaml \ No newline at end of file +../../roles/gce-cluster-variables/vars/main.yaml \ No newline at end of file diff --git a/playbooks/image.yaml b/playbooks/image.yaml index edac25e..a8b0e38 100644 --- a/playbooks/image.yaml +++ b/playbooks/image.yaml @@ -74,16 +74,10 @@ state: present when: ansible_os_family == "RedHat" -# GCE instances are starting with xfs AND barrier=1, which is only for extfs. -- name: Verify fstab entries are correct for XFS volumes - hosts: build_instance_ips - tasks: - - name: Remove barrier=1 from XFS fstab entries - command: sed -i -e 's/xfs\(.*\)barrier=1/xfs\1/g; s/, / /g' /etc/fstab - - name: Build image hosts: build_instance_ips roles: + - role: gce-image-configure - role: os_update_latest post_tasks: - name: Disable all repos on RHEL @@ -112,12 +106,6 @@ warn: no when: ansible_os_family == "RedHat" -#- name: Install and configure image -# hosts: build_instance_ips -# tasks: -# #- yum: name=* state=latest update_cache=yes -# # when: ansible_os_family == "RedHat" - - name: Commit image hosts: localhost connection: local diff --git a/playbooks/inventory.yaml b/playbooks/inventory.yaml index bfd6a3d..9eb1add 100644 --- a/playbooks/inventory.yaml +++ b/playbooks/inventory.yaml @@ -3,4 +3,4 @@ connection: local gather_facts: no roles: - - dynamic-inventory + - gce-dynamic-inventory diff --git a/playbooks/launch.yaml b/playbooks/launch.yaml index 819e816..170ce95 100644 --- a/playbooks/launch.yaml +++ b/playbooks/launch.yaml @@ -5,8 +5,8 @@ connection: local gather_facts: no roles: - - provision - - dynamic-inventory + - gce-provision + - gce-dynamic-inventory - hosts: localhost tasks: diff --git a/playbooks/roles/cluster-variables/tasks/main.yaml b/playbooks/roles/gce-cluster-variables/tasks/main.yaml similarity index 57% rename from playbooks/roles/cluster-variables/tasks/main.yaml rename to playbooks/roles/gce-cluster-variables/tasks/main.yaml index 7982070..7e18566 100644 --- a/playbooks/roles/cluster-variables/tasks/main.yaml +++ b/playbooks/roles/gce-cluster-variables/tasks/main.yaml @@ -1,3 +1,8 @@ +- name: Set the full path to the gcs_registry_keyfile as a fact + set_fact: + openshift_hosted_registry_storage_gcs_keyfile: "{{ playbook_dir + '/files/' + gcs_registry_keyfile }}" + when: gcs_registry_keyfile is defined + - include_vars: dir: "{{ playbook_dir }}/group_vars/all" name: _cluster_variables diff --git a/playbooks/roles/cluster-variables/vars/main.yaml b/playbooks/roles/gce-cluster-variables/vars/main.yaml similarity index 96% rename from playbooks/roles/cluster-variables/vars/main.yaml rename to playbooks/roles/gce-cluster-variables/vars/main.yaml index 52ff824..594b4c8 100644 --- a/playbooks/roles/cluster-variables/vars/main.yaml +++ b/playbooks/roles/gce-cluster-variables/vars/main.yaml @@ -18,6 +18,8 @@ openshift_master_cluster_public_hostname: "openshift-master.{{ public_hosted_zon openshift_master_default_subdomain: "{{ wildcard_zone }}" osm_default_node_selector: "role=app" openshift_deployment_type: origin +openshift_hosted_registry_storage_provider: gcs + openshift_master_identity_providers: - name: google kind: GoogleIdentityProvider diff --git a/playbooks/roles/docker-storage-setup/tasks/main.yaml b/playbooks/roles/gce-docker-storage-setup/tasks/main.yaml similarity index 82% rename from playbooks/roles/docker-storage-setup/tasks/main.yaml rename to playbooks/roles/gce-docker-storage-setup/tasks/main.yaml index 0f76504..54fc39f 100644 --- a/playbooks/roles/docker-storage-setup/tasks/main.yaml +++ b/playbooks/roles/gce-docker-storage-setup/tasks/main.yaml @@ -2,4 +2,4 @@ - name: create the docker-storage-setup config file template: src=docker-storage-setup.j2 dest=/etc/sysconfig/docker-storage-setup owner=root group=root mode=0644 - name: start docker - service: name=docker state=started + service: name=docker state=restarted diff --git a/playbooks/roles/docker-storage-setup/templates/docker-storage-setup.j2 b/playbooks/roles/gce-docker-storage-setup/templates/docker-storage-setup.j2 similarity index 88% rename from playbooks/roles/docker-storage-setup/templates/docker-storage-setup.j2 rename to playbooks/roles/gce-docker-storage-setup/templates/docker-storage-setup.j2 index ad7230d..aa45921 100644 --- a/playbooks/roles/docker-storage-setup/templates/docker-storage-setup.j2 +++ b/playbooks/roles/gce-docker-storage-setup/templates/docker-storage-setup.j2 @@ -1,9 +1,8 @@ -DEVS=/dev/sdb -VG=docker-vol - -{% set storagedriver = provision_gce_docker_storage_driver | default('devicemapper') %} +{% set storagedriver = provision_gce_docker_storage_driver | default('overlay2') %} STORAGE_DRIVER="{{ storagedriver }}" {% if storagedriver == 'devicemapper' %} +DEVS=/dev/sda +VG=docker-vol DATA_SIZE=95%VG EXTRA_DOCKER_STORAGE_OPTIONS="--storage-opt dm.basesize=3G" {% endif %} diff --git a/playbooks/roles/dynamic-inventory/tasks/main.yaml b/playbooks/roles/gce-dynamic-inventory/tasks/main.yaml similarity index 100% rename from playbooks/roles/dynamic-inventory/tasks/main.yaml rename to playbooks/roles/gce-dynamic-inventory/tasks/main.yaml diff --git a/playbooks/roles/dynamic-inventory/templates/inventory.j2.sh b/playbooks/roles/gce-dynamic-inventory/templates/inventory.j2.sh similarity index 100% rename from playbooks/roles/dynamic-inventory/templates/inventory.j2.sh rename to playbooks/roles/gce-dynamic-inventory/templates/inventory.j2.sh diff --git a/playbooks/roles/gce-image-configure/files/partition.conf b/playbooks/roles/gce-image-configure/files/partition.conf new file mode 100644 index 0000000..76e65ab --- /dev/null +++ b/playbooks/roles/gce-image-configure/files/partition.conf @@ -0,0 +1,3 @@ +[Service] +ExecStartPost=-/usr/bin/growpart /dev/sda 1 +ExecStartPost=-/sbin/xfs_growfs / \ No newline at end of file diff --git a/playbooks/roles/gce-image-configure/tasks/main.yaml b/playbooks/roles/gce-image-configure/tasks/main.yaml new file mode 100644 index 0000000..32b106b --- /dev/null +++ b/playbooks/roles/gce-image-configure/tasks/main.yaml @@ -0,0 +1,10 @@ +# GCE instances are starting with xfs AND barrier=1, which is only for extfs. +- name: Remove barrier=1 from XFS fstab entries + command: sed -i -e 's/xfs\(.*\)barrier=1/xfs\1/g; s/, / /g' /etc/fstab + +- name: Ensure the root filesystem has XFS group quota turned on + command: sed -i -e 's/linux16 \(.*\)$/linux16 \1 rootflags=gquota/g' /boot/grub2/grub.cfg + +- name: Ensure the root partition grows on startup + copy: src=partition.conf dest=/etc/systemd/system/google-instance-setup.service.d/ + diff --git a/playbooks/roles/instance-groups/tasks/main.yaml b/playbooks/roles/gce-instance-groups/tasks/main.yaml similarity index 100% rename from playbooks/roles/instance-groups/tasks/main.yaml rename to playbooks/roles/gce-instance-groups/tasks/main.yaml diff --git a/playbooks/roles/restrict-gce-metadata/tasks/main.yaml b/playbooks/roles/gce-node-restrict-metadata/tasks/main.yaml similarity index 100% rename from playbooks/roles/restrict-gce-metadata/tasks/main.yaml rename to playbooks/roles/gce-node-restrict-metadata/tasks/main.yaml diff --git a/playbooks/roles/openshift-volume-quota/tasks/main.yaml b/playbooks/roles/gce-node-volume-quota/tasks/main.yaml similarity index 100% rename from playbooks/roles/openshift-volume-quota/tasks/main.yaml rename to playbooks/roles/gce-node-volume-quota/tasks/main.yaml diff --git a/playbooks/roles/deprovision/tasks/main.yaml b/playbooks/roles/gce-provision-remove/tasks/main.yaml similarity index 100% rename from playbooks/roles/deprovision/tasks/main.yaml rename to playbooks/roles/gce-provision-remove/tasks/main.yaml diff --git a/playbooks/roles/deprovision/templates/deprovision.j2.sh b/playbooks/roles/gce-provision-remove/templates/deprovision.j2.sh similarity index 58% rename from playbooks/roles/deprovision/templates/deprovision.j2.sh rename to playbooks/roles/gce-provision-remove/templates/deprovision.j2.sh index 190c984..4d52bc9 100644 --- a/playbooks/roles/deprovision/templates/deprovision.j2.sh +++ b/playbooks/roles/gce-provision-remove/templates/deprovision.j2.sh @@ -2,12 +2,7 @@ set -euo pipefail -# Bucket for registry -if gsutil ls -p "{{ gce_project_id }}" "gs://{{ provision_gce_registry_gcs_bucket }}" &>/dev/null; then - gsutil -m rm -r "gs://{{ provision_gce_registry_gcs_bucket }}" -fi - -function teardown() { +function teardown_cmd() { a=( $@ ) local name=$1 a=( "${a[@]:1}" ) @@ -28,7 +23,23 @@ function teardown() { fi } +function teardown() { + for i in `seq 1 3`; do + if teardown_cmd $@; then + break + fi + done +} + +# Bucket for registry +( +if gsutil ls -p "{{ gce_project_id }}" "gs://{{ openshift_hosted_registry_storage_gcs_bucket }}" &>/dev/null; then + gsutil -m rm -r "gs://{{ openshift_hosted_registry_storage_gcs_bucket }}" +fi +) & + # DNS +( dns_zone="{{ dns_managed_zone | default(provision_prefix + 'managed-zone') }}" if gcloud --project "{{ gce_project_id }}" dns managed-zones describe "${dns_zone}" &>/dev/null; then # Retry DNS changes until they succeed since this may be a shared resource @@ -59,6 +70,7 @@ if gcloud --project "{{ gce_project_id }}" dns managed-zones describe "${dns_zon break done fi +) & # Preemptively spin down the instances ( @@ -94,73 +106,27 @@ teardown "{{ provision_prefix }}master-network-lb-ip" compute addresses --region ( # Master SSL network rules teardown "{{ provision_prefix }}master-ssl-lb-rule" compute forwarding-rules --global -teardown "{{ provision_prefix }}master-ssl-lb-target" compute target-ssl-proxies -teardown "{{ provision_prefix }}master-ssl-lb-cert" compute ssl-certificates +teardown "{{ provision_prefix }}master-ssl-lb-target" compute target-tcp-proxies teardown "{{ provision_prefix }}master-ssl-lb-ip" compute addresses --global teardown "{{ provision_prefix }}master-ssl-lb-backend" compute backend-services --global teardown "{{ provision_prefix }}master-ssl-lb-health-check" compute health-checks ) & -# Additional disks for instances for docker storage -instances=$(gcloud --project "{{ gce_project_id }}" compute instances list --filter='tags.items:{{ provision_prefix }}ocp AND tags.items:ocp' --format='value(name)') -for i in $instances; do - ( - instance_zone=$(gcloud --project "{{ gce_project_id }}" compute instances list --filter="name:${i}" --format='value(zone)') - docker_disk="${i}-docker" - if gcloud --project "{{ gce_project_id }}" compute disks describe "$docker_disk" --zone "$instance_zone" &>/dev/null; then - if ! gcloud --project "{{ gce_project_id }}" compute instances detach-disk "${i}" --disk "$docker_disk" --zone "$instance_zone"; then - echo "warning: Unable to detach docker disk or already detached" 1>&2 - fi - fi - openshift_disk="${i}-openshift" - if gcloud --project "{{ gce_project_id }}" compute disks describe "$openshift_disk" --zone "$instance_zone" &>/dev/null; then - if ! gcloud --project "{{ gce_project_id }}" compute instances detach-disk "${i}" --disk "$openshift_disk" --zone "$instance_zone"; then - echo "warning: Unable to detach openshift disk or already detached" 1>&2 - fi - fi - ) & -done - for i in `jobs -p`; do wait $i; done -# Wait for any remaining disks to be detached -done= -for i in `seq 1 60`; do - if [[ -z "$( gcloud --project "{{ gce_project_id }}" compute operations list --zones "{{ gce_zone_name }}" --filter 'operationType=detachDisk AND NOT status=DONE AND targetLink : "{{ provision_prefix }}ig-"' --page-size=10 --format 'value(targetLink)' --limit 1 )" ]]; then - done=1 - break - fi - sleep 2 -done -if [[ -z "${done}" ]]; then - echo "Failed to detach disks" - exit 1 -fi - -# Delete the disks in parallel with instance operations. Ignore failures to avoid preventing other expensive resources from -# being removed. -instances=$(gcloud --project "{{ gce_project_id }}" compute instances list --filter='tags.items:{{ provision_prefix }}ocp AND tags.items:ocp' --format='value(name)') -for i in $instances; do - instance_zone=$(gcloud --project "{{ gce_project_id }}" compute instances list --filter="name:${i}" --format='value(zone)') - ( gcloud -q --project "{{ gce_project_id }}" compute disks delete "${i}-docker" --zone "$instance_zone" || true ) & - ( gcloud -q --project "{{ gce_project_id }}" compute disks delete "${i}-openshift" --zone "$instance_zone" || true ) & -done - -# Instance groups -( teardown "{{ provision_prefix }}ig-m" compute instance-groups managed --zone "{{ gce_zone_name }}" ) & -( teardown "{{ provision_prefix }}ig-n" compute instance-groups managed --zone "{{ gce_zone_name }}" ) & -( teardown "{{ provision_prefix }}ig-i" compute instance-groups managed --zone "{{ gce_zone_name }}" ) & +{% for node_group in provision_gce_node_groups %} +# teardown {{ node_group.name }} +( + teardown "{{ provision_prefix }}ig-{{ node_group.suffix }}" compute instance-groups managed --zone "{{ gce_zone_name }}" + teardown "{{ provision_prefix }}instance-template-{{ node_group.name }}" compute instance-templates +) & +{% endfor %} for i in `jobs -p`; do wait $i; done -# Instance templates -( teardown "{{ provision_prefix }}instance-template-master" compute instance-templates ) & -( teardown "{{ provision_prefix }}instance-template-node" compute instance-templates ) & -( teardown "{{ provision_prefix }}instance-template-node-infra" compute instance-templates ) & - -# Firewall rules -# ['name']='parameters for "gcloud compute firewall-rules create"' -# For all possible parameters see: gcloud compute firewall-rules create --help +#Firewall rules +#['name']='parameters for "gcloud compute firewall-rules create"' +#For all possible parameters see: gcloud compute firewall-rules create --help declare -A FW_RULES=( ['icmp']="" ['ssh-external']="" @@ -173,7 +139,12 @@ declare -A FW_RULES=( ) for rule in "${!FW_RULES[@]}"; do ( if gcloud --project "{{ gce_project_id }}" compute firewall-rules describe "{{ provision_prefix }}$rule" &>/dev/null; then - gcloud -q --project "{{ gce_project_id }}" compute firewall-rules delete "{{ provision_prefix }}$rule" + # retry a few times because this call can be flaky + for i in `seq 1 3`; do + if gcloud -q --project "{{ gce_project_id }}" compute firewall-rules delete "{{ provision_prefix }}$rule"; then + break + fi + done fi ) & done diff --git a/playbooks/roles/provision/tasks/main.yaml b/playbooks/roles/gce-provision/tasks/main.yaml similarity index 100% rename from playbooks/roles/provision/tasks/main.yaml rename to playbooks/roles/gce-provision/tasks/main.yaml diff --git a/playbooks/roles/provision/templates/dns.j2.sh b/playbooks/roles/gce-provision/templates/dns.j2.sh similarity index 100% rename from playbooks/roles/provision/templates/dns.j2.sh rename to playbooks/roles/gce-provision/templates/dns.j2.sh diff --git a/playbooks/roles/provision/templates/provision.j2.sh b/playbooks/roles/gce-provision/templates/provision.j2.sh similarity index 65% rename from playbooks/roles/provision/templates/provision.j2.sh rename to playbooks/roles/gce-provision/templates/provision.j2.sh index 4e52d98..e68e968 100644 --- a/playbooks/roles/provision/templates/provision.j2.sh +++ b/playbooks/roles/gce-provision/templates/provision.j2.sh @@ -86,8 +86,6 @@ declare -A FW_RULES=( ['infra-node-internal']="--allow tcp:5000 --source-tags ocp --target-tags ocp-infra-node" ['infra-node-external']="--allow tcp:80,tcp:443,tcp:1936${range} --target-tags ocp-infra-node" ) - -# Create firewall rules for rule in "${!FW_RULES[@]}"; do ( if ! gcloud --project "{{ gce_project_id }}" compute firewall-rules describe "{{ provision_prefix }}$rule" &>/dev/null; then gcloud --project "{{ gce_project_id }}" compute firewall-rules create "{{ provision_prefix }}$rule" --network "{{ gce_network_name }}" ${FW_RULES[$rule]} @@ -96,6 +94,7 @@ for rule in "${!FW_RULES[@]}"; do fi ) & done + # Master IP ( if ! gcloud --project "{{ gce_project_id }}" compute addresses describe "{{ provision_prefix }}master-ssl-lb-ip" --global &>/dev/null; then gcloud --project "{{ gce_project_id }}" compute addresses create "{{ provision_prefix }}master-ssl-lb-ip" --global @@ -103,7 +102,6 @@ else echo "IP '{{ provision_prefix }}master-ssl-lb-ip' already exists" fi ) & - # Internal master IP ( if ! gcloud --project "{{ gce_project_id }}" compute addresses describe "{{ provision_prefix }}master-network-lb-ip" --region "{{ gce_region_name }}" &>/dev/null; then gcloud --project "{{ gce_project_id }}" compute addresses create "{{ provision_prefix }}master-network-lb-ip" --region "{{ gce_region_name }}" @@ -118,137 +116,57 @@ else echo "IP '{{ provision_prefix }}router-network-lb-ip' already exists" fi ) & -for i in `jobs -p`; do wait $i; done - -# Create instance templates -( -if ! gcloud --project "{{ gce_project_id }}" compute instance-templates describe "{{ provision_prefix }}instance-template-master" &>/dev/null; then - gcloud --project "{{ gce_project_id }}" compute instance-templates create "{{ provision_prefix }}instance-template-master" --machine-type "{{ provision_gce_machine_type_master }}" --network "{{ gce_network_name }}" --tags "{{ provision_prefix }}ocp,ocp,ocp-master{{ gce_extra_tags_master }}" --image "${image}" --boot-disk-size "35" --boot-disk-type "pd-ssd" --scopes logging-write,monitoring-write,useraccounts-ro,service-control,service-management,storage-ro,compute-rw ${metadata} -else - echo "Instance template '{{ provision_prefix }}instance-template-master' already exists" -fi - -# Create Master instance group -if ! gcloud --project "{{ gce_project_id }}" compute instance-groups managed describe "{{ provision_prefix }}ig-m" --zone "{{ gce_zone_name }}" &>/dev/null; then - gcloud --project "{{ gce_project_id }}" compute instance-groups managed create "{{ provision_prefix }}ig-m" --zone "{{ gce_zone_name }}" --template "{{ provision_prefix }}instance-template-master" --size "{{ provision_gce_instance_group_size_master }}" - gcloud --project "{{ gce_project_id }}" compute instance-groups managed set-named-ports "{{ provision_prefix }}ig-m" --zone "{{ gce_zone_name }}" --named-ports "{{ provision_prefix }}-port-name-master:{{ internal_console_port }}" -else - echo "Instance group '{{ provision_prefix }}ig-m' already exists" -fi -) & +{% for node_group in provision_gce_node_groups %} +# configure {{ node_group.name }} ( -if ! gcloud --project "{{ gce_project_id }}" compute instance-templates describe "{{ provision_prefix }}instance-template-node" &>/dev/null; then - gcloud --project "{{ gce_project_id }}" compute instance-templates create "{{ provision_prefix }}instance-template-node" --machine-type "{{ provision_gce_machine_type_node }}" --network "{{ gce_network_name }}" --tags "{{ provision_prefix }}ocp,ocp,ocp-node{{ gce_extra_tags_node }}" --image "${image}" --boot-disk-size "25" --boot-disk-type "pd-ssd" --scopes logging-write,monitoring-write,useraccounts-ro,service-control,service-management,storage-ro,compute-rw ${metadata} -else - echo "Instance template '{{ provision_prefix }}instance-template-node' already exists" -fi - -# Create Node instance group -if ! gcloud --project "{{ gce_project_id }}" compute instance-groups managed describe "{{ provision_prefix }}ig-n" --zone "{{ gce_zone_name }}" &>/dev/null; then - gcloud --project "{{ gce_project_id }}" compute instance-groups managed create "{{ provision_prefix }}ig-n" --zone "{{ gce_zone_name }}" --template "{{ provision_prefix }}instance-template-node" --size "{{ provision_gce_instance_group_size_node }}" -else - echo "Instance group '{{ provision_prefix }}ig-n' already exists" -fi -) & - -( -if ! gcloud --project "{{ gce_project_id }}" compute instance-templates describe "{{ provision_prefix }}instance-template-node-infra" &>/dev/null; then - gcloud --project "{{ gce_project_id }}" compute instance-templates create "{{ provision_prefix }}instance-template-node-infra" --machine-type "{{ provision_gce_machine_type_node_infra }}" --network "{{ gce_network_name }}" --tags "{{ provision_prefix }}ocp,ocp,ocp-infra-node{{ gce_extra_tags_node_infra }}" --image "${image}" --boot-disk-size "25" --boot-disk-type "pd-ssd" --scopes logging-write,monitoring-write,useraccounts-ro,service-control,service-management,storage-rw,compute-rw ${metadata} -else - echo "Instance template '{{ provision_prefix }}instance-template-node-infra' already exists" -fi - -# Create Infra node instance group -if ! gcloud --project "{{ gce_project_id }}" compute instance-groups managed describe "{{ provision_prefix }}ig-i" --zone "{{ gce_zone_name }}" &>/dev/null; then - gcloud --project "{{ gce_project_id }}" compute instance-groups managed create "{{ provision_prefix }}ig-i" --zone "{{ gce_zone_name }}" --template "{{ provision_prefix }}instance-template-node-infra" --size "{{ provision_gce_instance_group_size_node_infra }}" -else - echo "Instance group '{{ provision_prefix }}ig-i' already exists" -fi -) & - -for i in `jobs -p`; do wait $i; done - -# Make attach idempotent and reentrant -function try_attach_disk() { - if ! out=$( gcloud --project "{{ gce_project_id }}" compute instances attach-disk "$1" --disk "$2" --zone "$3" 2>&1 ); then - if [[ "${out}" == *"is already being used by"* ]]; then - echo "Disk '$2' already attached" - return 0 - fi - echo "${out}" 1>&2 - return 1 - fi - # TODO: identify whether we should turn on auto-delete - # gcloud --project "{{ gce_project_id }}" compute instances set-disk-auto-delete "$1" --disk "$2" --zone "$3" --auto-delete -} - -# Attach additional disks to instances for docker storage -# TODO: do we actually want multiple disks?make -instances=$(gcloud --project "{{ gce_project_id }}" compute instances list --filter='tags.items:{{ provision_prefix }}ocp AND tags.items:ocp' --format='value(name)') -for i in $instances; do - ( - instance_zone=$(gcloud --project "{{ gce_project_id }}" compute instances list --filter="name:${i}" --format='value(zone)') - docker_disk="${i}-docker" - if ! gcloud --project "{{ gce_project_id }}" compute disks describe "$docker_disk" --zone "$instance_zone" &>/dev/null; then - gcloud --project "{{ gce_project_id }}" compute disks create "$docker_disk" --zone "$instance_zone" --size "{{ provision_gce_disk_size_node_docker }}" --type "pd-ssd" + if ! gcloud --project "{{ gce_project_id }}" compute instance-templates describe "{{ provision_prefix }}instance-template-{{ node_group.name }}" &>/dev/null; then + gcloud --project "{{ gce_project_id }}" compute instance-templates create "{{ provision_prefix }}instance-template-{{ node_group.name }}" \ + --machine-type "{{ node_group.machine_type }}" --network "{{ gce_network_name }}" \ + --tags "{{ provision_prefix }}ocp,ocp,{{ node_group.tags }}" \ + --boot-disk-size "{{ node_group.boot_disk_size }}" --boot-disk-type "pd-ssd" \ + --scopes "logging-write,monitoring-write,useraccounts-ro,service-control,service-management,storage-ro,compute-rw" \ + --image "${image}" ${metadata} else - echo "Disk '${docker_disk}' already exists" + echo "Instance template '{{ provision_prefix }}instance-template-{{ node_group.name }}' already exists" fi - openshift_disk="${i}-openshift" - if ! gcloud --project "{{ gce_project_id }}" compute disks describe "$openshift_disk" --zone "$instance_zone" &>/dev/null; then - gcloud --project "{{ gce_project_id }}" compute disks create "$openshift_disk" --zone "$instance_zone" --size "{{ provision_gce_disk_size_node_openshift }}" --type "pd-ssd" + + # Create instance group + if ! gcloud --project "{{ gce_project_id }}" compute instance-groups managed describe "{{ provision_prefix }}ig-{{ node_group.suffix }}" --zone "{{ gce_zone_name }}" &>/dev/null; then + gcloud --project "{{ gce_project_id }}" compute instance-groups managed create "{{ provision_prefix }}ig-{{ node_group.suffix }}" \ + --zone "{{ gce_zone_name }}" --template "{{ provision_prefix }}instance-template-{{ node_group.name }}" --size "{{ node_group.scale }}" else - echo "Disk '${openshift_disk}' already exists" + echo "Instance group '{{ provision_prefix }}ig-{{ node_group.suffix }}' already exists" fi - try_attach_disk "${i}" "${docker_disk}" "${instance_zone}" - try_attach_disk "${i}" "${openshift_disk}" "${instance_zone}" - ) & -done +) & +{% endfor %} for i in `jobs -p`; do wait $i; done -# Master health check + +# Configure the master external LB rules ( +# Master health check if ! gcloud --project "{{ gce_project_id }}" compute health-checks describe "{{ provision_prefix }}master-ssl-lb-health-check" &>/dev/null; then gcloud --project "{{ gce_project_id }}" compute health-checks create https "{{ provision_prefix }}master-ssl-lb-health-check" --port "{{ internal_console_port }}" --request-path "/healthz" else echo "Health check '{{ provision_prefix }}master-ssl-lb-health-check' already exists" fi +gcloud --project "{{ gce_project_id }}" compute instance-groups managed set-named-ports "{{ provision_prefix }}ig-m" \ + --zone "{{ gce_zone_name }}" --named-ports "{{ provision_prefix }}port-name-master:{{ internal_console_port }}" + # Master backend service if ! gcloud --project "{{ gce_project_id }}" compute backend-services describe "{{ provision_prefix }}master-ssl-lb-backend" --global &>/dev/null; then - gcloud --project "{{ gce_project_id }}" compute backend-services create "{{ provision_prefix }}master-ssl-lb-backend" --health-checks "{{ provision_prefix }}master-ssl-lb-health-check" --port-name "{{ provision_prefix }}-port-name-master" --protocol "SSL" --global --timeout="{{ provision_gce_master_https_timeout | default('2m') }}" + gcloud --project "{{ gce_project_id }}" compute backend-services create "{{ provision_prefix }}master-ssl-lb-backend" --health-checks "{{ provision_prefix }}master-ssl-lb-health-check" --port-name "{{ provision_prefix }}port-name-master" --protocol "TCP" --global --timeout="{{ provision_gce_master_https_timeout | default('2m') }}" gcloud --project "{{ gce_project_id }}" compute backend-services add-backend "{{ provision_prefix }}master-ssl-lb-backend" --instance-group "{{ provision_prefix }}ig-m" --global --instance-group-zone "{{ gce_zone_name }}" else echo "Backend service '{{ provision_prefix }}master-ssl-lb-backend' already exists" fi -) & - -# Master Certificate -( if ! gcloud --project "{{ gce_project_id }}" compute ssl-certificates describe "{{ provision_prefix }}master-ssl-lb-cert" &>/dev/null; then - if [ -z "{{ provision_master_https_key_file }}" ] || [ -z "{{ provision_master_https_cert_file }}" ]; then - KEY='/tmp/ocp-ssl.key' - CERT='/tmp/ocp-ssl.crt' - openssl req -x509 -nodes -days 3650 -newkey rsa:2048 -subj "/C=US/L=Raleigh/O={{ public_hosted_zone }}/CN={{ openshift_master_cluster_public_hostname }}" -keyout "$KEY" -out "$CERT" - else - KEY="{{ provision_master_https_key_file }}" - CERT="{{ provision_master_https_cert_file }}" - fi - gcloud --project "{{ gce_project_id }}" compute ssl-certificates create "{{ provision_prefix }}master-ssl-lb-cert" --private-key "$KEY" --certificate "$CERT" - if [ -z "{{ provision_master_https_key_file }}" ] || [ -z "{{ provision_master_https_cert_file }}" ]; then - rm -fv "$KEY" "$CERT" - fi -else - echo "Certificate '{{ provision_prefix }}master-ssl-lb-cert' already exists" -fi ) & - -for i in `jobs -p`; do wait $i; done -( -# Master ssl proxy target -if ! gcloud --project "{{ gce_project_id }}" compute target-ssl-proxies describe "{{ provision_prefix }}master-ssl-lb-target" &>/dev/null; then - gcloud --project "{{ gce_project_id }}" compute target-ssl-proxies create "{{ provision_prefix }}master-ssl-lb-target" --backend-service "{{ provision_prefix }}master-ssl-lb-backend" --ssl-certificate "{{ provision_prefix }}master-ssl-lb-cert" +# Master tcp proxy target +if ! gcloud --project "{{ gce_project_id }}" compute target-tcp-proxies describe "{{ provision_prefix }}master-ssl-lb-target" &>/dev/null; then + gcloud --project "{{ gce_project_id }}" compute target-tcp-proxies create "{{ provision_prefix }}master-ssl-lb-target" --backend-service "{{ provision_prefix }}master-ssl-lb-backend" else echo "Proxy target '{{ provision_prefix }}master-ssl-lb-target' already exists" fi @@ -256,12 +174,14 @@ fi # Master forwarding rule if ! gcloud --project "{{ gce_project_id }}" compute forwarding-rules describe "{{ provision_prefix }}master-ssl-lb-rule" --global &>/dev/null; then IP=$(gcloud --project "{{ gce_project_id }}" compute addresses describe "{{ provision_prefix }}master-ssl-lb-ip" --global --format='value(address)') - gcloud --project "{{ gce_project_id }}" compute forwarding-rules create "{{ provision_prefix }}master-ssl-lb-rule" --address "$IP" --global --ports "{{ console_port }}" --target-ssl-proxy "{{ provision_prefix }}master-ssl-lb-target" + gcloud --project "{{ gce_project_id }}" compute forwarding-rules create "{{ provision_prefix }}master-ssl-lb-rule" --address "$IP" --global --ports "{{ console_port }}" --target-tcp-proxy "{{ provision_prefix }}master-ssl-lb-target" else echo "Forwarding rule '{{ provision_prefix }}master-ssl-lb-rule' already exists" fi ) & + +# Configure the master internal LB rules ( # Internal master health check if ! gcloud --project "{{ gce_project_id }}" compute http-health-checks describe "{{ provision_prefix }}master-network-lb-health-check" &>/dev/null; then @@ -286,6 +206,8 @@ else fi ) & + +# Configure the infra node rules ( # Router health check if ! gcloud --project "{{ gce_project_id }}" compute http-health-checks describe "{{ provision_prefix }}router-network-lb-health-check" &>/dev/null; then @@ -313,13 +235,17 @@ fi for i in `jobs -p`; do wait $i; done # set the target pools +( if [[ "ig-m" == "{{ provision_gce_router_network_instance_group }}" ]]; then gcloud --project "{{ gce_project_id }}" compute instance-groups managed set-target-pools "{{ provision_prefix }}ig-m" --target-pools "{{ provision_prefix }}master-network-lb-pool,{{ provision_prefix }}router-network-lb-pool" --zone "{{ gce_zone_name }}" else gcloud --project "{{ gce_project_id }}" compute instance-groups managed set-target-pools "{{ provision_prefix }}ig-m" --target-pools "{{ provision_prefix }}master-network-lb-pool" --zone "{{ gce_zone_name }}" gcloud --project "{{ gce_project_id }}" compute instance-groups managed set-target-pools "{{ provision_prefix }}{{ provision_gce_router_network_instance_group }}" --target-pools "{{ provision_prefix }}router-network-lb-pool" --zone "{{ gce_zone_name }}" fi +) & +# configure DNS +( # Retry DNS changes until they succeed since this may be a shared resource while true; do dns="${TMPDIR:-/tmp}/dns.yaml" @@ -371,27 +297,22 @@ while true; do fi break done +) & # Create bucket for registry -( if ! gsutil ls -p "{{ gce_project_id }}" "gs://{{ provision_gce_registry_gcs_bucket }}" &>/dev/null; then - gsutil mb -p "{{ gce_project_id }}" -l "{{ gce_region_name }}" "gs://{{ provision_gce_registry_gcs_bucket }}" +( +if ! gsutil ls -p "{{ gce_project_id }}" "gs://{{ openshift_hosted_registry_storage_gcs_bucket }}" &>/dev/null; then + gsutil mb -p "{{ gce_project_id }}" -l "{{ gce_region_name }}" "gs://{{ openshift_hosted_registry_storage_gcs_bucket }}" else - echo "Bucket '{{ provision_gce_registry_gcs_bucket }}' already exists" -fi ) & + echo "Bucket '{{ openshift_hosted_registry_storage_gcs_bucket }}' already exists" +fi +) & -for i in `jobs -p`; do wait $i; done +# wait until all node groups are stable +{% for node_group in provision_gce_node_groups %} +# wait for stable {{ node_group.name }} +( gcloud --project "{{ gce_project_id }}" compute instance-groups managed wait-until-stable "{{ provision_prefix }}ig-{{ node_group.suffix }}" --zone "{{ gce_zone_name }}" --timeout=300) & +{% endfor %} -# Wait for any remaining disks to be attached -done= -for i in `seq 1 60`; do - if [[ -z "$( gcloud --project "{{ gce_project_id }}" compute operations list --zones "{{ gce_zone_name }}" --filter 'operationType=attachDisk AND NOT status=DONE AND targetLink : "{{ provision_prefix }}ig-"' --page-size=10 --format 'value(targetLink)' --limit 1 )" ]]; then - done=1 - break - fi - sleep 2 -done -if [[ -z "${done}" ]]; then - echo "Failed to attach disks" - exit 1 -fi \ No newline at end of file +for i in `jobs -p`; do wait $i; done diff --git a/playbooks/roles/validate-masters/tasks/main.yaml b/playbooks/roles/master-validate/tasks/main.yaml similarity index 90% rename from playbooks/roles/validate-masters/tasks/main.yaml rename to playbooks/roles/master-validate/tasks/main.yaml index d9163be..61e3b6c 100644 --- a/playbooks/roles/validate-masters/tasks/main.yaml +++ b/playbooks/roles/master-validate/tasks/main.yaml @@ -6,8 +6,8 @@ method: GET register: resp until: resp.status == 200 - retries: 3 - delay: 10 + retries: 6 + delay: 5 - name: Validate the internal address uri: url: "https://{{ openshift_master_cluster_hostname }}:{{ internal_console_port }}/healthz/ready" @@ -15,8 +15,8 @@ method: GET register: resp until: resp.status == 200 - retries: 3 - delay: 10 + retries: 6 + delay: 5 - name: Validate the master address uri: url: "https://{{ inventory_hostname }}:{{ internal_console_port }}/healthz/ready" @@ -24,5 +24,5 @@ method: GET register: resp until: resp.status == 200 - retries: 3 - delay: 10 + retries: 6 + delay: 5 diff --git a/playbooks/roles/openshift-emptydir-quota/handlers/main.yaml b/playbooks/roles/node-emptydir-quota/handlers/main.yaml similarity index 100% rename from playbooks/roles/openshift-emptydir-quota/handlers/main.yaml rename to playbooks/roles/node-emptydir-quota/handlers/main.yaml diff --git a/playbooks/roles/openshift-emptydir-quota/tasks/main.yaml b/playbooks/roles/node-emptydir-quota/tasks/main.yaml similarity index 69% rename from playbooks/roles/openshift-emptydir-quota/tasks/main.yaml rename to playbooks/roles/node-emptydir-quota/tasks/main.yaml index 159ae2d..cacaec7 100644 --- a/playbooks/roles/openshift-emptydir-quota/tasks/main.yaml +++ b/playbooks/roles/node-emptydir-quota/tasks/main.yaml @@ -3,7 +3,7 @@ replace: dest: /etc/origin/node/node-config.yaml regexp: '^(.*)perFSGroup: (\s+.*)?$' - replace: '\1 perFSGroup: 512Mi\2' + replace: '\1 perFSGroup: {{ provision_gce_emptydir_quota | default("2048Mi") }}\2' backup: yes notify: - restart atomic-openshift-node diff --git a/playbooks/roles/openshift-registry/tasks/main.yaml b/playbooks/roles/openshift-registry/tasks/main.yaml deleted file mode 100644 index a77eaa1..0000000 --- a/playbooks/roles/openshift-registry/tasks/main.yaml +++ /dev/null @@ -1,95 +0,0 @@ ---- -- name: Switch to default project - command: oc project default - -- name: set the selector for the default namespace - command: oc annotate --overwrite namespace default openshift.io/node-selector=role=infra - ignore_errors: true - -- name: Check whether a registry exists or not - command: oadm registry --dry-run - register: registry_out - ignore_errors: true - -- name: Install registry - command: "oadm registry --selector='role=infra' --replicas=2 --config=/etc/origin/master/admin.kubeconfig --service-account=registry" - when: registry_out | failed - ignore_errors: true - -- name: Make sure registry deployment version is non-zero - shell: "oc get --no-headers dc/docker-registry | awk '{print $3}'" - register: deployer_waiter_out - until: '"0" not in deployer_waiter_out.stdout' - retries: 15 - delay: 10 - -- name: Determine registry deployment version - shell: "oc get --no-headers dc/docker-registry | awk '{print $2}'" - register: registry_version_out - -- name: Wait for registry to be running - shell: oc get pod | grep -v deploy | awk '/docker-registry-{{ registry_version_out.stdout }}/{ print $3 }' | head -1 - register: deployer_output - until: deployer_output.stdout | search("Running") - retries: 30 - delay: 30 - -- name: Disable config change trigger on registry DC - command: oc patch dc/docker-registry -p '{"spec":{"triggers":[]}}' - -- name: Set up registry environment variable - command: oc env dc/docker-registry REGISTRY_CONFIGURATION_PATH=/etc/registryconfig/config.yml - -- name: Generate docker registry config - template: src="registry.j2" dest="/root/config.yml" owner=root mode=0600 - -- name: Determine if new secrets are needed - command: oc get secrets - register: secrets - -- name: Place registry key onto host - copy: - src: "{{ playbook_dir + '/files/' + gcs_registry_keyfile }}" - dest: /root/gcs-registry.json - mode: 0600 - when: "'dockerregistry' not in secrets.stdout" - -- name: Create registry secrets - command: oc secrets new dockerregistry /root/config.yml /root/gcs-registry.json - when: "'dockerregistry' not in secrets.stdout" - - -- name: Determine if service account contains secrets - command: oc describe serviceaccount/registry - register: serviceaccount - -- name: Add secrets to registry service account - command: oc secrets add serviceaccount/registry secrets/dockerregistry - when: "'dockerregistry' not in serviceaccount.stdout" - -- name: Determine if deployment config contains secrets - command: oc volume dc/docker-registry --list - register: dc - -- name: Add volume to registry deployment config - command: oc volume dc/docker-registry --add --name=dockersecrets -m /etc/registryconfig --type=secret --secret-name=dockerregistry - when: "'dockersecrets' not in dc.stdout" - -- name: Deploy latest configuration of registry DC - command: oc deploy docker-registry --latest - register: deploy_latest - -- name: Re-enable config trigger on docker-registry - command: oc patch dc/docker-registry -p '{"spec":{"triggers":[{"type":"ConfigChange"}]}}' - when: deploy_latest | success - -- name: Determine registry deployment version - shell: "oc get --no-headers dc/docker-registry | awk '{print $2}'" - register: registry_version2_out - -- name: Wait for registry to be running - shell: oc get pod | grep -v deploy | awk '/docker-registry-{{ registry_version2_out.stdout }}/{ print $3 }' | head -1 - register: deployer_output - until: deployer_output.stdout | search("Running") - retries: 30 - delay: 30 diff --git a/playbooks/roles/openshift-registry/templates/registry.j2 b/playbooks/roles/openshift-registry/templates/registry.j2 deleted file mode 100644 index 7f9ca9b..0000000 --- a/playbooks/roles/openshift-registry/templates/registry.j2 +++ /dev/null @@ -1,18 +0,0 @@ -version: 0.1 -log: - level: debug -http: - addr: :5000 -storage: - cache: - layerinfo: inmemory - gcs: - bucket: "{{ provision_gce_registry_gcs_bucket }}" - rootdirectory: /registry - keyfile: /etc/registryconfig/gcs-registry.json -auth: - openshift: - realm: openshift -middleware: - repository: - - name: openshift diff --git a/playbooks/roles/validate-app/tasks/main.yaml b/playbooks/roles/validate-app/tasks/main.yaml deleted file mode 100644 index afbebcb..0000000 --- a/playbooks/roles/validate-app/tasks/main.yaml +++ /dev/null @@ -1,39 +0,0 @@ ---- -- name: Gather facts - openshift_facts: - role: common - -- name: Create the validation project - command: "{{ openshift.common.client_binary }} new-project validate" - -- name: Create Hello world app - shell: "{{ openshift.common.client_binary }} new-app --template cakephp-example-mysql" - -- name: Wait for build to complete - shell: "{{ openshift.common.client_binary }} get pod | grep -v deploy | awk '/cakephp-example-mysql-1-build/{ print $3 }'" - register: build_output - until: build_output.stdout | search("Completed") - retries: 30 - delay: 15 - -- name: Wait for App to be running - shell: "{{ openshift.common.client_binary }} get pod | grep -v deploy | grep -v build | awk '/cakephp-example-mysql-1-*/{print $3}'" - register: deployer_output - until: deployer_output.stdout | search("Running") - retries: 30 - delay: 15 - -- name: Sleep to allow for route propegation - pause: - seconds: 10 -- name: check the status of the page - uri: - url: "http://cakephp-example-validate.{{ wildcard_zone }}" - method: GET - register: resp - until: resp.status == 200 - retries: 3 - delay: 10 - -- name: Delete the Project - command: "{{ openshift.common.client_binary }} delete project validate" diff --git a/playbooks/roles/validate-etcd/tasks/main.yaml b/playbooks/roles/validate-etcd/tasks/main.yaml deleted file mode 100644 index 53c187e..0000000 --- a/playbooks/roles/validate-etcd/tasks/main.yaml +++ /dev/null @@ -1,14 +0,0 @@ ---- -- name: Validate etcd - command: "etcdctl -C https://{{ ansible_fqdn }}:2379 --ca-file=/etc/origin/master/master.etcd-ca.crt --cert-file=/etc/origin/master/master.etcd-client.crt --key-file=/etc/origin/master/master.etcd-client.key cluster-health | grep 'cluster is'" - register: etcd_health - -- name: ETCD Cluster is healthy - debug: - msg: "Cluster is healthy" - when: etcd_health.stdout.find('cluster is healthy') != -1 - -- name: ETCD Cluster is NOT healthy - debug: - msg: "Cluster is NOT healthy" - when: etcd_health.stdout.find('cluster is healthy') == -1 diff --git a/playbooks/roles/validate-public/tasks/main.yaml b/playbooks/roles/validate-public/tasks/main.yaml deleted file mode 100644 index be3e98f..0000000 --- a/playbooks/roles/validate-public/tasks/main.yaml +++ /dev/null @@ -1,10 +0,0 @@ ---- -- name: Validate the public address - uri: - url: "https://{{ openshift_master_cluster_public_hostname }}:{{ console_port }}/healthz/ready" - validate_certs: False - method: GET - register: resp - until: resp.status == 200 - retries: 3 - delay: 10 diff --git a/playbooks/terminate.yaml b/playbooks/terminate.yaml index 38fc4fb..fdb00d6 100644 --- a/playbooks/terminate.yaml +++ b/playbooks/terminate.yaml @@ -4,4 +4,4 @@ hosts: localhost connection: local roles: - - deprovision + - gce-provision-remove diff --git a/playbooks/update.yaml b/playbooks/update.yaml index 1667ccd..a5bc9b8 100644 --- a/playbooks/update.yaml +++ b/playbooks/update.yaml @@ -3,12 +3,12 @@ - hosts: localhost connection: local roles: - - instance-groups + - gce-instance-groups - hosts: cluster_hosts roles: - - cluster-variables + - gce-cluster-variables # TODO: make this parameterizeable? That would make this a dynamic playbook, which has consequences for # syntax checking. -- include: /usr/share/ansible/openshift-ansible/playbooks/byo/openshift-cluster/upgrades/v3_6/upgrade.yml +- include: /usr/share/ansible/openshift-ansible/playbooks/byo/openshift-cluster/upgrades/v3_7/upgrade.yml