Skip to content

Commit

Permalink
Use gomplate to template patches/hpa.yaml. (#293)
Browse files Browse the repository at this point in the history
* use `gomplate` for templating `hpa.yaml`.

* change reference in the docs as well.

* remove unused `GPU_MAX_TIMES_X` and `GPU_MAX_DIVIDED_BY_X` env vars.
  • Loading branch information
willgraf authored Mar 17, 2020
1 parent a931d46 commit 21f8867
Show file tree
Hide file tree
Showing 5 changed files with 13 additions and 71 deletions.
36 changes: 2 additions & 34 deletions .travis.yml
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,7 @@ jobs:
if: ( (type = pull_request AND branch = master) OR (commit_message =~ /\[build-integration-tests\]/) )
env:
- CHARTS_PATH=/home/travis/build/vanvalenlab/kiosk/conf/charts
- CLOUD_PROVIDER=gke
- CLOUD_PROVIDER=gke
- CLOUDSDK_BUCKET=deepcell-output-benchmarking
- CLOUDSDK_COMPUTE_REGION=us-west1
- CLOUDSDK_CONFIG=/home/travis/build/vanvalenlab/kiosk/.config/gcloud/
Expand All @@ -34,22 +34,6 @@ jobs:
- GKE_MACHINE_TYPE=n1-standard-1
- GCP_SERVICE_ACCOUNT=continuous-integration-test@deepcell-209717.iam.gserviceaccount.com
- GPU_MACHINE_TYPE=n1-highmem-2
- GPU_MAX_DIVIDED_BY_FOUR=1
- GPU_MAX_DIVIDED_BY_THREE=1
- GPU_MAX_DIVIDED_BY_TWO=2
- GPU_MAX_TIMES_FIFTY=200
- GPU_MAX_TIMES_FIVE=20
- GPU_MAX_TIMES_FOUR=16
- GPU_MAX_TIMES_FOURTY=160
- GPU_MAX_TIMES_ONE_HUNDRED=400
- GPU_MAX_TIMES_ONE_HUNDRED_FIFTY=600
- GPU_MAX_TIMES_SEVENTY_FIVE=300
- GPU_MAX_TIMES_TEN=40
- GPU_MAX_TIMES_THIRTY=120
- GPU_MAX_TIMES_THREE=12
- GPU_MAX_TIMES_TWENTY=80
- GPU_MAX_TIMES_TWO=8
- GPU_MAX_TIMES_TWO_HUNDRED=800
- GPU_NODE_MIN_SIZE=0
- GPU_NODE_MAX_SIZE=4
- GPU_PER_NODE=1
Expand All @@ -68,7 +52,7 @@ jobs:
if: ( (type = pull_request AND branch = master) OR (commit_message =~ /\[build-integration-tests\]/) ) AND (commit_message =~ /\[test-elk\]/)
env:
- CHARTS_PATH=/home/travis/build/vanvalenlab/kiosk/conf/charts
- CLOUD_PROVIDER=gke
- CLOUD_PROVIDER=gke
- CLOUDSDK_BUCKET=deepcell-output-benchmarking
- CLOUDSDK_COMPUTE_REGION=us-west1
- CLOUDSDK_CONFIG=/home/travis/build/vanvalenlab/kiosk/.config/gcloud/
Expand All @@ -81,22 +65,6 @@ jobs:
- GKE_MACHINE_TYPE=n1-standard-1
- GCP_SERVICE_ACCOUNT=continuous-integration-test@deepcell-209717.iam.gserviceaccount.com
- GPU_MACHINE_TYPE=n1-highmem-2
- GPU_MAX_DIVIDED_BY_FOUR=1
- GPU_MAX_DIVIDED_BY_THREE=1
- GPU_MAX_DIVIDED_BY_TWO=2
- GPU_MAX_TIMES_FIFTY=200
- GPU_MAX_TIMES_FIVE=20
- GPU_MAX_TIMES_FOUR=16
- GPU_MAX_TIMES_FOURTY=160
- GPU_MAX_TIMES_ONE_HUNDRED=400
- GPU_MAX_TIMES_ONE_HUNDRED_FIFTY=600
- GPU_MAX_TIMES_SEVENTY_FIVE=300
- GPU_MAX_TIMES_TEN=40
- GPU_MAX_TIMES_THIRTY=120
- GPU_MAX_TIMES_THREE=12
- GPU_MAX_TIMES_TWENTY=80
- GPU_MAX_TIMES_TWO=8
- GPU_MAX_TIMES_TWO_HUNDRED=800
- GPU_NODE_MIN_SIZE=0
- GPU_NODE_MAX_SIZE=4
- GPU_PER_NODE=1
Expand Down
13 changes: 7 additions & 6 deletions conf/patches/hpa.yaml
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
{{ $max_gpus := conv.ToInt (getenv "GPU_NODE_MAX_SIZE" | default 1) }}
---
apiVersion: autoscaling/v2beta1
kind: HorizontalPodAutoscaler
Expand All @@ -10,7 +11,7 @@ spec:
kind: Deployment
name: tf-serving
minReplicas: 1
maxReplicas: $GPU_NODE_MAX_SIZE
maxReplicas: {{ $max_gpus }}
metrics:
- type: Object
object:
Expand All @@ -32,7 +33,7 @@ spec:
# kind: Deployment
# name: data-processing
# minReplicas: 1
# maxReplicas: $GPU_MAX_TIMES_TWENTY
# maxReplicas: {{ mul $max_gpus 20 }}
# metrics:
# - type: Resource
# resource:
Expand All @@ -50,7 +51,7 @@ spec:
kind: Deployment
name: frontend
minReplicas: 1
maxReplicas: $GPU_MAX_TIMES_TEN
maxReplicas: {{ mul $max_gpus 10 }}
metrics:
- type: Resource
resource:
Expand All @@ -68,7 +69,7 @@ spec:
kind: Deployment
name: segmentation-consumer
minReplicas: 1
maxReplicas: $GPU_MAX_TIMES_ONE_HUNDRED_FIFTY
maxReplicas: {{ mul $max_gpus 150 }}
metrics:
- type: Object
object:
Expand All @@ -90,7 +91,7 @@ spec:
kind: Deployment
name: zip-consumer
minReplicas: 1
maxReplicas: $GPU_MAX_TIMES_ONE_HUNDRED
maxReplicas: {{ mul $max_gpus 100 }}
metrics:
- type: Object
object:
Expand All @@ -112,7 +113,7 @@ spec:
kind: Deployment
name: tracking-consumer
minReplicas: 1
maxReplicas: $GPU_MAX_TIMES_FIFTY
maxReplicas: {{ mul $max_gpus 50 }}
metrics:
- type: Object
object:
Expand Down
3 changes: 1 addition & 2 deletions conf/tasks/Makefile.kubectl
Original file line number Diff line number Diff line change
Expand Up @@ -50,6 +50,5 @@ kubectl/destroy/prometheus/operator:

## Create horizontal pod autoscalers for all relevant deployments
kubectl/implement/autoscaling:
@envsubst < patches/hpa.yaml > patches/hpa_subbed.yaml
@kubectl apply -f patches/hpa_subbed.yaml
gomplate -f patches/hpa.yaml | kubectl apply -f -
@kubens deepcell
2 changes: 1 addition & 1 deletion docs/source/CUSTOM-JOB.rst
Original file line number Diff line number Diff line change
Expand Up @@ -265,7 +265,7 @@ To effectively scale your new consumer, some small edits will be needed in the f
kind: Deployment
name: tracking-consumer
minReplicas: 1
maxReplicas: $GPU_MAX_TIMES_FIFTY
maxReplicas: {{ mul $max_gpus 50 }}
metrics:
- type: Object
object:
Expand Down
30 changes: 2 additions & 28 deletions scripts/menu.sh
Original file line number Diff line number Diff line change
Expand Up @@ -136,28 +136,6 @@ function tailcmd() {
--tailbox "${tmpfile}" $((LINES-5)) $((COLUMNS-3))
}

function export_gpu_constants() {
# create some derivative GPU-related variables for use in autoscaling
if [ ! -z "${GPU_NODE_MAX_SIZE}" ]; then
export GPU_MAX_TIMES_TWO=$(($GPU_NODE_MAX_SIZE*2))
export GPU_MAX_TIMES_THREE=$(($GPU_NODE_MAX_SIZE*3))
export GPU_MAX_TIMES_FOUR=$(($GPU_NODE_MAX_SIZE*4))
export GPU_MAX_TIMES_FIVE=$(($GPU_NODE_MAX_SIZE*5))
export GPU_MAX_TIMES_TEN=$(($GPU_NODE_MAX_SIZE*10))
export GPU_MAX_TIMES_TWENTY=$(($GPU_NODE_MAX_SIZE*20))
export GPU_MAX_TIMES_THIRTY=$(($GPU_NODE_MAX_SIZE*30))
export GPU_MAX_TIMES_FOURTY=$(($GPU_NODE_MAX_SIZE*40))
export GPU_MAX_TIMES_FIFTY=$(($GPU_NODE_MAX_SIZE*50))
export GPU_MAX_TIMES_SEVENTY_FIVE=$(($GPU_NODE_MAX_SIZE*75))
export GPU_MAX_TIMES_ONE_HUNDRED=$(($GPU_NODE_MAX_SIZE*100))
export GPU_MAX_TIMES_ONE_HUNDRED_FIFTY=$(($GPU_NODE_MAX_SIZE*150))
export GPU_MAX_TIMES_TWO_HUNDRED=$(($GPU_NODE_MAX_SIZE*200))
export GPU_MAX_DIVIDED_BY_TWO=$(($GPU_NODE_MAX_SIZE/2))
export GPU_MAX_DIVIDED_BY_THREE=$(($GPU_NODE_MAX_SIZE/3))
export GPU_MAX_DIVIDED_BY_FOUR=$(($GPU_NODE_MAX_SIZE/4))
fi
}

function menu() {
# Show different functions in the main menu depending on whether the
# cluster has been created yet.
Expand Down Expand Up @@ -264,8 +242,7 @@ function configure_aws() {
-e AWS_S3_BUCKET \
-e NAMESPACE \
-e GPU_NODE_MIN_SIZE \
-e GPU_NODE_MAX_SIZE \
-e GPU_MAX > ${GEODESIC_CONFIG_HOME}/preferences
-e GPU_NODE_MAX_SIZE > ${GEODESIC_CONFIG_HOME}/preferences
}

function configure_gke() {
Expand Down Expand Up @@ -509,8 +486,6 @@ function configure_gke() {
# The type of node for the consumer node pools
export CONSUMER_MACHINE_TYPE=${CONSUMER_MACHINE_TYPE:-n1-highmem-2}

export_gpu_constants

printenv | grep -e CLOUD_PROVIDER \
-e CLOUDSDK \
-e NODE_MIN_SIZE \
Expand All @@ -523,8 +498,7 @@ function configure_gke() {
-e GCP_TRAINING_GPU_TYPE \
-e GPU_PER_NODE \
-e GPU_NODE_MIN_SIZE \
-e GPU_NODE_MAX_SIZE \
-e GPU_MAX > ${GEODESIC_CONFIG_HOME}/preferences
-e GPU_NODE_MAX_SIZE > ${GEODESIC_CONFIG_HOME}/preferences
}

function shell() {
Expand Down

0 comments on commit 21f8867

Please sign in to comment.