From 7dd46d6c04e6cb216a50678173d367cc791e7a50 Mon Sep 17 00:00:00 2001 From: Aaron Crickenberger Date: Thu, 7 May 2020 16:29:28 -0700 Subject: [PATCH 1/7] add support for nodepool labels and taints --- .../gcp/clusters/modules/gke-nodepool/main.tf | 6 ++++++ .../modules/gke-nodepool/variables.tf | 21 +++++++++++++++++++ 2 files changed, 27 insertions(+) diff --git a/infra/gcp/clusters/modules/gke-nodepool/main.tf b/infra/gcp/clusters/modules/gke-nodepool/main.tf index 4e1466c9ca8..b008951a707 100644 --- a/infra/gcp/clusters/modules/gke-nodepool/main.tf +++ b/infra/gcp/clusters/modules/gke-nodepool/main.tf @@ -44,6 +44,8 @@ resource "google_container_node_pool" "node_pool" { machine_type = var.machine_type disk_size_gb = var.disk_size_gb disk_type = var.disk_type + labels = var.labels + taint = var.taints service_account = var.service_account oauth_scopes = ["https://www.googleapis.com/auth/cloud-platform"] @@ -61,5 +63,9 @@ resource "google_container_node_pool" "node_pool" { // the old one lifecycle { create_before_destroy = true + # https://www.terraform.io/docs/providers/google/r/container_cluster.html#taint + ignore_changes = [ + node_config["taint"], + ] } } diff --git a/infra/gcp/clusters/modules/gke-nodepool/variables.tf b/infra/gcp/clusters/modules/gke-nodepool/variables.tf index 7f16d67971c..ec60da96f32 100644 --- a/infra/gcp/clusters/modules/gke-nodepool/variables.tf +++ b/infra/gcp/clusters/modules/gke-nodepool/variables.tf @@ -64,6 +64,27 @@ variable "disk_type" { type = string } +variable "labels" { + description = "The labels to apply to this node_pool" + type = map(string) + default = {} +} + +# Terraform docs suggest not using terraform to manage taints, because GKE is going +# to auto-apply taints if certain features are enabled, and terraform doesn't do well +# when something else is managing the same thing it's managing. +# +# So this is mostly here to describe intent. It will assign taints at creation time, +# but cannot be used to ensure the taints remain applied throughout the node pool's +# lifecycle +# +# ref: https://www.terraform.io/docs/providers/google/r/container_cluster.html#taint +variable "taints" { + description = "The taints to apply to this node_pool upon creation (NOTE: changes will be ignored throughout lifecycle)" + type = list(object({ key = string, value = string, effect = string })) + default = [] +} + variable "service_account" { description = "The email address of the GCP Service Account to be associated with nodes in this node_pool" type = string From 98facc2008478f7d1e73425eafd3901d73a720be Mon Sep 17 00:00:00 2001 From: Aaron Crickenberger Date: Thu, 7 May 2020 19:39:20 -0700 Subject: [PATCH 2/7] add a greenhouse nodepool --- .../k8s-infra-prow-build/prow-build/main.tf | 19 +++++++++++++++++++ 1 file changed, 19 insertions(+) diff --git a/infra/gcp/clusters/projects/k8s-infra-prow-build/prow-build/main.tf b/infra/gcp/clusters/projects/k8s-infra-prow-build/prow-build/main.tf index 45a07c1a8b2..03a8ca5958f 100644 --- a/infra/gcp/clusters/projects/k8s-infra-prow-build/prow-build/main.tf +++ b/infra/gcp/clusters/projects/k8s-infra-prow-build/prow-build/main.tf @@ -110,3 +110,22 @@ module "prow_build_nodepool" { disk_type = "pd-ssd" service_account = module.prow_build_cluster.cluster_node_sa.email } + +module "greenhouse_nodepool" { + source = "../../../modules/gke-nodepool" + project_name = local.project_id + cluster_name = module.prow_build_cluster.cluster.name + location = module.prow_build_cluster.cluster.location + name = "greenhouse" + labels = { dedicated = "greenhouse" } + # NOTE: taints are only applied during creation and ignored after that, see module docs + taints = [{ key = "dedicated", value = "greenhouse", effect = "NO_SCHEDULE" }] + min_count = 1 + max_count = 1 + image_type = "COS" + # choosing a machine type to maximize IOPs + machine_type = "n1-standard-32" + disk_size_gb = 100 + disk_type = "pd-standard" + service_account = module.prow_build_cluster.cluster_node_sa.email +} From 9d50770d4b73823e08b038aaaf62b68360fb8de1 Mon Sep 17 00:00:00 2001 From: Aaron Crickenberger Date: Thu, 7 May 2020 19:52:51 -0700 Subject: [PATCH 3/7] add greenhouse resources copied from k/test-infra/greenhouse and renamed --- .../resources/greenhouse-deployment.yaml | 54 +++++++++++++++++++ .../resources/greenhouse-metrics-service.yaml | 15 ++++++ .../resources/greenhouse-service.yaml | 26 +++++++++ .../resources/greenhouse-storage.yaml | 30 +++++++++++ 4 files changed, 125 insertions(+) create mode 100644 infra/gcp/clusters/projects/k8s-infra-prow-build/prow-build/resources/greenhouse-deployment.yaml create mode 100644 infra/gcp/clusters/projects/k8s-infra-prow-build/prow-build/resources/greenhouse-metrics-service.yaml create mode 100644 infra/gcp/clusters/projects/k8s-infra-prow-build/prow-build/resources/greenhouse-service.yaml create mode 100644 infra/gcp/clusters/projects/k8s-infra-prow-build/prow-build/resources/greenhouse-storage.yaml diff --git a/infra/gcp/clusters/projects/k8s-infra-prow-build/prow-build/resources/greenhouse-deployment.yaml b/infra/gcp/clusters/projects/k8s-infra-prow-build/prow-build/resources/greenhouse-deployment.yaml new file mode 100644 index 00000000000..bbddec5467a --- /dev/null +++ b/infra/gcp/clusters/projects/k8s-infra-prow-build/prow-build/resources/greenhouse-deployment.yaml @@ -0,0 +1,54 @@ +# Copyright 2018 The Kubernetes Authors All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +apiVersion: extensions/v1beta1 +kind: Deployment +metadata: + name: greenhouse + labels: + app: greenhouse +spec: + replicas: 1 + template: + metadata: + labels: + app: greenhouse + spec: + containers: + - name: greenhouse + image: gcr.io/k8s-testimages/greenhouse:latest + imagePullPolicy: Always + ports: + - name: cache + containerPort: 8080 + - name: metrics + containerPort: 9090 + args: + - --dir=/data + - --min-percent-blocks-free=2 + volumeMounts: + - name: cache + mountPath: /data + volumes: + - name: cache + persistentVolumeClaim: + claimName: greenhouse + # run on our dedicated node + tolerations: + - key: "dedicated" + operator: "Equal" + value: "greenhouse" + effect: "NoSchedule" + nodeSelector: + dedicated: "greenhouse" diff --git a/infra/gcp/clusters/projects/k8s-infra-prow-build/prow-build/resources/greenhouse-metrics-service.yaml b/infra/gcp/clusters/projects/k8s-infra-prow-build/prow-build/resources/greenhouse-metrics-service.yaml new file mode 100644 index 00000000000..78ba9b76493 --- /dev/null +++ b/infra/gcp/clusters/projects/k8s-infra-prow-build/prow-build/resources/greenhouse-metrics-service.yaml @@ -0,0 +1,15 @@ +apiVersion: v1 +kind: Service +metadata: + name: bazel-cache-metrics + namespace: default +spec: + selector: + app: greenhouse + ports: + - name: default + protocol: TCP + port: 80 + targetPort: 9090 + loadBalancerIP: 35.225.115.154 + type: LoadBalancer diff --git a/infra/gcp/clusters/projects/k8s-infra-prow-build/prow-build/resources/greenhouse-service.yaml b/infra/gcp/clusters/projects/k8s-infra-prow-build/prow-build/resources/greenhouse-service.yaml new file mode 100644 index 00000000000..38042bbea50 --- /dev/null +++ b/infra/gcp/clusters/projects/k8s-infra-prow-build/prow-build/resources/greenhouse-service.yaml @@ -0,0 +1,26 @@ +# Copyright 2018 The Kubernetes Authors All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +apiVersion: v1 +kind: Service +metadata: + name: bazel-cache + labels: + run: bazel-cache +spec: + ports: + - port: 8080 + protocol: TCP + selector: + app: greenhouse diff --git a/infra/gcp/clusters/projects/k8s-infra-prow-build/prow-build/resources/greenhouse-storage.yaml b/infra/gcp/clusters/projects/k8s-infra-prow-build/prow-build/resources/greenhouse-storage.yaml new file mode 100644 index 00000000000..705fd780986 --- /dev/null +++ b/infra/gcp/clusters/projects/k8s-infra-prow-build/prow-build/resources/greenhouse-storage.yaml @@ -0,0 +1,30 @@ +# storage class used by greenhouse for GKE / GCE we use persistent SSD +# previously we also used local SSDs via hostPath which are *great* but +# "only" ~375 GB +# https://cloud.google.com/compute/docs/disks/ +kind: StorageClass +apiVersion: storage.k8s.io/v1 +metadata: + name: greenhouse +provisioner: kubernetes.io/gce-pd +parameters: + type: pd-ssd +# we want to use a volume with strictatime,lazytime (and not noatime or relatime) +# so that file access times *are* recorded but are lazily flushed to the disk +# https://lwn.net/Articles/621046/ +# https://unix.stackexchange.com/questions/276858/why-is-ext4-filesystem-mounted-with-both-relatime-and-lazytime +mountOptions: ["strictatime", "lazytime"] +--- +# 3TB of SSD :-) +kind: PersistentVolumeClaim +apiVersion: v1 +metadata: + name: greenhouse +spec: + accessModes: + - ReadWriteOnce + resources: + requests: + storage: 3000Gi + storageClassName: greenhouse +--- From c677befc245cd592355e1163eac94f85359bdf78 Mon Sep 17 00:00:00 2001 From: Aaron Crickenberger Date: Mon, 18 May 2020 15:30:26 -0700 Subject: [PATCH 4/7] greenhouse: presets assume deployment in default --- .../prow-build/resources/greenhouse-deployment.yaml | 1 + .../prow-build/resources/greenhouse-service.yaml | 1 + .../prow-build/resources/greenhouse-storage.yaml | 1 + 3 files changed, 3 insertions(+) diff --git a/infra/gcp/clusters/projects/k8s-infra-prow-build/prow-build/resources/greenhouse-deployment.yaml b/infra/gcp/clusters/projects/k8s-infra-prow-build/prow-build/resources/greenhouse-deployment.yaml index bbddec5467a..40702124b49 100644 --- a/infra/gcp/clusters/projects/k8s-infra-prow-build/prow-build/resources/greenhouse-deployment.yaml +++ b/infra/gcp/clusters/projects/k8s-infra-prow-build/prow-build/resources/greenhouse-deployment.yaml @@ -16,6 +16,7 @@ apiVersion: extensions/v1beta1 kind: Deployment metadata: name: greenhouse + namespace: default labels: app: greenhouse spec: diff --git a/infra/gcp/clusters/projects/k8s-infra-prow-build/prow-build/resources/greenhouse-service.yaml b/infra/gcp/clusters/projects/k8s-infra-prow-build/prow-build/resources/greenhouse-service.yaml index 38042bbea50..4b1d9455d70 100644 --- a/infra/gcp/clusters/projects/k8s-infra-prow-build/prow-build/resources/greenhouse-service.yaml +++ b/infra/gcp/clusters/projects/k8s-infra-prow-build/prow-build/resources/greenhouse-service.yaml @@ -16,6 +16,7 @@ apiVersion: v1 kind: Service metadata: name: bazel-cache + namespace: default labels: run: bazel-cache spec: diff --git a/infra/gcp/clusters/projects/k8s-infra-prow-build/prow-build/resources/greenhouse-storage.yaml b/infra/gcp/clusters/projects/k8s-infra-prow-build/prow-build/resources/greenhouse-storage.yaml index 705fd780986..18bab407b93 100644 --- a/infra/gcp/clusters/projects/k8s-infra-prow-build/prow-build/resources/greenhouse-storage.yaml +++ b/infra/gcp/clusters/projects/k8s-infra-prow-build/prow-build/resources/greenhouse-storage.yaml @@ -20,6 +20,7 @@ kind: PersistentVolumeClaim apiVersion: v1 metadata: name: greenhouse + namespace: default spec: accessModes: - ReadWriteOnce From a2cc717798bd504d75164ae05eabf64e9e4d63db Mon Sep 17 00:00:00 2001 From: Aaron Crickenberger Date: Wed, 20 May 2020 14:01:39 -0700 Subject: [PATCH 5/7] switch greenhouse pool image to match build pool --- .../clusters/projects/k8s-infra-prow-build/prow-build/main.tf | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/infra/gcp/clusters/projects/k8s-infra-prow-build/prow-build/main.tf b/infra/gcp/clusters/projects/k8s-infra-prow-build/prow-build/main.tf index 03a8ca5958f..16cd663a309 100644 --- a/infra/gcp/clusters/projects/k8s-infra-prow-build/prow-build/main.tf +++ b/infra/gcp/clusters/projects/k8s-infra-prow-build/prow-build/main.tf @@ -122,7 +122,8 @@ module "greenhouse_nodepool" { taints = [{ key = "dedicated", value = "greenhouse", effect = "NO_SCHEDULE" }] min_count = 1 max_count = 1 - image_type = "COS" + # choosing this image for parity with the build nodepool + image_type = "UBUNTU_CONTAINERD" # choosing a machine type to maximize IOPs machine_type = "n1-standard-32" disk_size_gb = 100 From f9aabb79532627de612f92c4441d500f14f7162d Mon Sep 17 00:00:00 2001 From: Aaron Crickenberger Date: Wed, 20 May 2020 15:24:03 -0700 Subject: [PATCH 6/7] allocate ip for greenhouse-metrics service --- .../prow-build/resources/greenhouse-metrics-service.yaml | 2 +- infra/gcp/ensure-e2e-projects.sh | 9 +++++++++ 2 files changed, 10 insertions(+), 1 deletion(-) diff --git a/infra/gcp/clusters/projects/k8s-infra-prow-build/prow-build/resources/greenhouse-metrics-service.yaml b/infra/gcp/clusters/projects/k8s-infra-prow-build/prow-build/resources/greenhouse-metrics-service.yaml index 78ba9b76493..61e900249e8 100644 --- a/infra/gcp/clusters/projects/k8s-infra-prow-build/prow-build/resources/greenhouse-metrics-service.yaml +++ b/infra/gcp/clusters/projects/k8s-infra-prow-build/prow-build/resources/greenhouse-metrics-service.yaml @@ -11,5 +11,5 @@ spec: protocol: TCP port: 80 targetPort: 9090 - loadBalancerIP: 35.225.115.154 + loadBalancerIP: 34.72.140.202 type: LoadBalancer diff --git a/infra/gcp/ensure-e2e-projects.sh b/infra/gcp/ensure-e2e-projects.sh index e54397ea0f0..398e79fba9a 100755 --- a/infra/gcp/ensure-e2e-projects.sh +++ b/infra/gcp/ensure-e2e-projects.sh @@ -48,6 +48,15 @@ ensure_regional_address \ "to allow monitoring.k8s.prow.io to scrape boskos metrics" ) 2>&1 | indent +color 6 "Ensuring greenhouse is empowered" +( +ensure_regional_address \ + "k8s-infra-prow-build" \ + "us-central1" \ + "greenhouse-metrics" \ + "to allow monitoring.k8s.prow.io to scrape greenhouse metrics" +) 2>&1 | indent + ## setup projects to be used by e2e tests for standing up clusters E2E_PROJECTS=( From 2cd3f71fd7e0985b5df1caba48eada49ad901448 Mon Sep 17 00:00:00 2001 From: Aaron Crickenberger Date: Wed, 27 May 2020 11:46:05 -0700 Subject: [PATCH 7/7] migrate greenhouse deployment to apps/v1 --- .../prow-build/resources/greenhouse-deployment.yaml | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/infra/gcp/clusters/projects/k8s-infra-prow-build/prow-build/resources/greenhouse-deployment.yaml b/infra/gcp/clusters/projects/k8s-infra-prow-build/prow-build/resources/greenhouse-deployment.yaml index 40702124b49..6695b1a215e 100644 --- a/infra/gcp/clusters/projects/k8s-infra-prow-build/prow-build/resources/greenhouse-deployment.yaml +++ b/infra/gcp/clusters/projects/k8s-infra-prow-build/prow-build/resources/greenhouse-deployment.yaml @@ -12,7 +12,7 @@ # See the License for the specific language governing permissions and # limitations under the License. -apiVersion: extensions/v1beta1 +apiVersion: apps/v1 kind: Deployment metadata: name: greenhouse @@ -21,6 +21,9 @@ metadata: app: greenhouse spec: replicas: 1 + selector: + matchLabels: + app: greenhouse template: metadata: labels: