From c14c299a8d48d89ebc9efe2e3b83c5ce16db3341 Mon Sep 17 00:00:00 2001 From: roberta-dt Date: Tue, 15 Aug 2023 15:52:25 +0100 Subject: [PATCH 01/11] refactor(Makefile): simplify make commands --- Makefile | 123 ++++++++++++++++++++++++++++--------------------------- 1 file changed, 62 insertions(+), 61 deletions(-) diff --git a/Makefile b/Makefile index 13020ced..6c20a715 100644 --- a/Makefile +++ b/Makefile @@ -23,77 +23,78 @@ pre-commit: ## Runs the pre-commit checks over entire repo cd pipelines && \ poetry run pre-commit run --all-files -setup: ## Set up local environment for Python development on pipelines - @cd pipelines && \ - poetry install --with dev +env ?= dev +deploy: ## Deploy the Terraform infrastructure to your project. Requires VERTEX_PROJECT_ID and VERTEX_LOCATION env variables to be set in env.sh. Optionally specify env= (default = dev) + @ cd terraform/envs/$(env) && \ + terraform init -backend-config='bucket=${VERTEX_PROJECT_ID}-tfstate' && \ + terraform apply -var 'project_id=${VERTEX_PROJECT_ID}' -var 'region=${VERTEX_LOCATION}' + +undeploy: ## DESTROY the Terraform infrastructure in your project. Requires VERTEX_PROJECT_ID and VERTEX_LOCATION env variables to be set in env.sh. Optionally specify env= (default = dev) + @ cd terraform/envs/$(env) && \ + terraform init -backend-config='bucket=${VERTEX_PROJECT_ID}-tfstate' && \ + terraform destroy -var 'project_id=${VERTEX_PROJECT_ID}' -var 'region=${VERTEX_LOCATION}' -test-trigger: ## Runs unit tests for the pipeline trigger code +install: ## Set up local environment for Python development on pipelines @cd pipelines && \ - poetry run python -m pytest tests/trigger + poetry install --with dev && \ + cd .. && \ + for component_group in components/*/ ; do \ + echo "Setup for $$component_group" && \ + cd "$$component_group" && \ + poetry install --with dev && \ + cd ../.. ;\ + done ; \ -compile-pipeline: ## Compile the pipeline to pipeline.yaml. Must specify pipeline= + +compile: ## Compile the pipeline to pipeline.yaml. Must specify pipeline= @cd pipelines/src && \ poetry run kfp dsl compile --py pipelines/${pipeline}/pipeline.py --output pipelines/${pipeline}/pipeline.yaml --function pipeline -setup-components: ## Run unit tests for a component group - @cd "components/${GROUP}" && \ - poetry install --with dev - -setup-all-components: ## Run unit tests for all pipeline components - @set -e && \ - for component_group in components/*/ ; do \ - echo "Setup components under $$component_group" && \ - $(MAKE) setup-components GROUP=$$(basename $$component_group) ; \ - done - -test-components: ## Run unit tests for a component group - @cd "components/${GROUP}" && \ - poetry run pytest - -test-all-components: ## Run unit tests for all pipeline components - @set -e && \ - for component_group in components/*/ ; do \ - echo "Test components under $$component_group" && \ - $(MAKE) test-components GROUP=$$(basename $$component_group) ; \ - done +targets ?= training serving +build: ## Build and push training/serving container image using Docker. Specify target= + @cd model && \ + for target in $$targets ; do \ + echo "Building $$target image" && \ + gcloud builds submit . \ + --region=${VERTEX_LOCATION} \ + --project=${VERTEX_PROJECT_ID} \ + --gcs-source-staging-dir=gs://${VERTEX_PROJECT_ID}-staging/source \ + --substitutions=_DOCKER_TARGET=${target},_DESTINATION_IMAGE_URI=${CONTAINER_IMAGE_REGISTRY}/${target}:${RESOURCE_SUFFIX} ; \ + done + + +compile ?=true +build ?= true +run: ## Compile pipeline and run pipeline in sandbox environment. Must specify pipeline=. Optionally specify enable_pipeline_caching= (defaults to default Vertex caching behaviour) + @if [ "${compile}" ]; then \ + $(MAKE) compile ; \ + fi && \ + if [ "${build}" ]; then \ + $(MAKE) build ; \ + fi && \ + cd pipelines/src \ + poetry run python -m pipelines.trigger --template_path=pipelines/${pipeline}/pipeline.yaml --enable_caching=$(enable_pipeline_caching) -test-components-coverage: ## Run tests with coverage - @cd "components/${GROUP}" && \ - poetry run coverage run -m pytest && \ - poetry run coverage report -m -test-all-components-coverage: ## Run tests with coverage - @set -e && \ - for component_group in components/*/ ; do \ - echo "Test components under $$component_group" && \ - $(MAKE) test-components-coverage GROUP=$$(basename $$component_group) ; \ - done +test: + @if [ -n "${GROUP}" ]; then \ + echo "Test components under components/${GROUP}" && \ + cd components/${GROUP} && \ + poetry run pytest ; \ + else \ + echo "Testing scripts" && \ + cd pipelines && \ + poetry run python -m pytest tests/trigger &&\ + cd .. && \ + for i in components/*/ ; do \ + echo "Test components under $$i" && \ + cd "$$i" && \ + poetry run pytest && \ + cd ../.. ;\ + done ; \ + fi -run: ## Compile pipeline and run pipeline in sandbox environment. Must specify pipeline=. Optionally specify enable_pipeline_caching= (defaults to default Vertex caching behaviour) - @ $(MAKE) compile-pipeline && \ - cd pipelines/src && \ - poetry run python -m pipelines.trigger --template_path=pipelines/${pipeline}/pipeline.yaml --enable_caching=$(enable_pipeline_caching) e2e-tests: ## Perform end-to-end (E2E) pipeline tests. Must specify pipeline=. Optionally specify enable_pipeline_caching= (defaults to default Vertex caching behaviour). @ cd pipelines && \ poetry run pytest --log-cli-level=INFO tests/$(pipeline) --enable_caching=$(enable_pipeline_caching) - -env ?= dev -deploy-infra: ## Deploy the Terraform infrastructure to your project. Requires VERTEX_PROJECT_ID and VERTEX_LOCATION env variables to be set in env.sh. Optionally specify env= (default = dev) - @ cd terraform/envs/$(env) && \ - terraform init -backend-config='bucket=${VERTEX_PROJECT_ID}-tfstate' && \ - terraform apply -var 'project_id=${VERTEX_PROJECT_ID}' -var 'region=${VERTEX_LOCATION}' - -destroy-infra: ## DESTROY the Terraform infrastructure in your project. Requires VERTEX_PROJECT_ID and VERTEX_LOCATION env variables to be set in env.sh. Optionally specify env= (default = dev) - @ cd terraform/envs/$(env) && \ - terraform init -backend-config='bucket=${VERTEX_PROJECT_ID}-tfstate' && \ - terraform destroy -var 'project_id=${VERTEX_PROJECT_ID}' -var 'region=${VERTEX_LOCATION}' - -target ?= training -build-container: ## Build and push training/serving container image using Docker. Specify target= - @ cd model && \ - gcloud builds submit . \ - --region=${VERTEX_LOCATION} \ - --project=${VERTEX_PROJECT_ID} \ - --gcs-source-staging-dir=gs://${VERTEX_PROJECT_ID}-staging/source \ - --substitutions=_DOCKER_TARGET=${target},_DESTINATION_IMAGE_URI=${CONTAINER_IMAGE_REGISTRY}/${target}:${RESOURCE_SUFFIX} From 4161e4e20361000ace88dd0da525da0b3bf64795 Mon Sep 17 00:00:00 2001 From: roberta-dt Date: Thu, 17 Aug 2023 13:06:22 +0100 Subject: [PATCH 02/11] build(cloudbuild): update make commands --- cloudbuild/e2e-test.yaml | 2 +- cloudbuild/pr-checks.yaml | 9 ++++----- cloudbuild/release.yaml | 6 +++--- cloudbuild/trigger-tests.yaml | 4 ++-- 4 files changed, 10 insertions(+), 11 deletions(-) diff --git a/cloudbuild/e2e-test.yaml b/cloudbuild/e2e-test.yaml index b6724cec..6576f750 100644 --- a/cloudbuild/e2e-test.yaml +++ b/cloudbuild/e2e-test.yaml @@ -44,7 +44,7 @@ steps: - | curl -sSL https://install.python-poetry.org | python3 - && \ export PATH="/builder/home/.local/bin:$$PATH" && \ - make setup && \ + make install && \ make e2e-tests pipeline=training enable_pipeline_caching=False && \ make e2e-tests pipeline=prediction enable_pipeline_caching=False env: diff --git a/cloudbuild/pr-checks.yaml b/cloudbuild/pr-checks.yaml index a7819caa..085a1324 100644 --- a/cloudbuild/pr-checks.yaml +++ b/cloudbuild/pr-checks.yaml @@ -24,14 +24,13 @@ steps: - | curl -sSL https://install.python-poetry.org | python3 - && \ export PATH="/builder/home/.local/bin:$$PATH" && \ - make setup && \ + make install && \ git init && \ git add . && \ make pre-commit && \ - make compile-pipeline pipeline=training && \ - make compile-pipeline pipeline=prediction && \ - make setup-all-components && \ - make test-all-components + make compile pipeline=training && \ + make compile pipeline=prediction && \ + make test env: - SKIP=terraform-fmt,git-dirty - CONTAINER_IMAGE_REGISTRY=dummy_value diff --git a/cloudbuild/release.yaml b/cloudbuild/release.yaml index c7e514b3..fc20bc75 100644 --- a/cloudbuild/release.yaml +++ b/cloudbuild/release.yaml @@ -39,11 +39,11 @@ steps: - | curl -sSL https://install.python-poetry.org | python3 - && \ export PATH="/builder/home/.local/bin:$$PATH" && \ - make setup && \ + make install && \ for proj in ${_DESTINATION_PROJECTS} ; do \ CONTAINER_IMAGE_REGISTRY=${_VERTEX_LOCATION}-docker.pkg.dev/$$proj/vertex-images \ - make compile-pipeline pipeline=training && \ - make compile-pipeline pipeline=prediction && \ + make compile pipeline=training && \ + make compile pipeline=prediction && \ cd pipelines && \ poetry run python -m pipelines.utils.upload_pipeline \ --dest=https://${_VERTEX_LOCATION}-kfp.pkg.dev/$$proj/vertex-pipelines \ diff --git a/cloudbuild/trigger-tests.yaml b/cloudbuild/trigger-tests.yaml index 4778396c..d1864221 100644 --- a/cloudbuild/trigger-tests.yaml +++ b/cloudbuild/trigger-tests.yaml @@ -19,8 +19,8 @@ steps: - | curl -sSL https://install.python-poetry.org | python3 - && \ export PATH="/builder/home/.local/bin:$$PATH" && \ - make setup && \ - make test-trigger + make install && \ + make test entrypoint: /bin/sh options: From 9cf60f54573bf9419cbc63d4e465662b13636417 Mon Sep 17 00:00:00 2001 From: roberta-dt Date: Mon, 21 Aug 2023 12:43:09 +0100 Subject: [PATCH 03/11] refactor(Makefile): fix targets list with training and serving commands --- Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Makefile b/Makefile index 6c20a715..93ebe9b4 100644 --- a/Makefile +++ b/Makefile @@ -50,7 +50,7 @@ compile: ## Compile the pipeline to pipeline.yaml. Must specify pipeline= @cd model && \ for target in $$targets ; do \ From b45c50aaf2498c051e1891cea251c3d6db088534 Mon Sep 17 00:00:00 2001 From: roberta-dt Date: Mon, 21 Aug 2023 12:53:37 +0100 Subject: [PATCH 04/11] docs(Makefile): add and update doc strings --- Makefile | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/Makefile b/Makefile index 93ebe9b4..e52f7590 100644 --- a/Makefile +++ b/Makefile @@ -51,7 +51,7 @@ compile: ## Compile the pipeline to pipeline.yaml. Must specify pipeline= +build: ## Build and push training and/or serving container(s) image using Docker. Specify targets= e.g. targets=training or targets=training serving (default) @cd model && \ for target in $$targets ; do \ echo "Building $$target image" && \ @@ -76,7 +76,7 @@ run: ## Compile pipeline and run pipeline in sandbox environment. Must specify p poetry run python -m pipelines.trigger --template_path=pipelines/${pipeline}/pipeline.yaml --enable_caching=$(enable_pipeline_caching) -test: +test: ## Run unit tests for a component group or for all component groups and the pipeline trigger code. @if [ -n "${GROUP}" ]; then \ echo "Test components under components/${GROUP}" && \ cd components/${GROUP} && \ From 5fb0503599d831520a4f8ac3fe0bfd0f721d3e97 Mon Sep 17 00:00:00 2001 From: roberta-dt Date: Tue, 22 Aug 2023 01:00:40 +0100 Subject: [PATCH 05/11] refactor(Makefile): add doc string to the run command --- Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Makefile b/Makefile index e52f7590..f0986df2 100644 --- a/Makefile +++ b/Makefile @@ -65,7 +65,7 @@ build: ## Build and push training and/or serving container(s) image using Docker compile ?=true build ?= true -run: ## Compile pipeline and run pipeline in sandbox environment. Must specify pipeline=. Optionally specify enable_pipeline_caching= (defaults to default Vertex caching behaviour) +run: ## Compile or build pipeline and run pipeline in sandbox environment. Compile and build set to true by default @if [ "${compile}" ]; then \ $(MAKE) compile ; \ fi && \ From 38c94d99b25351ad5423e930ef9e36585ab392d2 Mon Sep 17 00:00:00 2001 From: roberta-dt Date: Tue, 22 Aug 2023 01:11:21 +0100 Subject: [PATCH 06/11] refactor: fix rebase issues --- Makefile | 57 +------------------------------------------------------- 1 file changed, 1 insertion(+), 56 deletions(-) diff --git a/Makefile b/Makefile index e5e9c49b..f0986df2 100644 --- a/Makefile +++ b/Makefile @@ -1,4 +1,4 @@ -# Copyright 2023 Google LLC +# Copyright 2022 Google LLC # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. @@ -23,61 +23,6 @@ pre-commit: ## Runs the pre-commit checks over entire repo cd pipelines && \ poetry run pre-commit run --all-files -setup: ## Set up local environment for Python development on pipelines - @cd pipelines && \ - poetry install --with dev - -test-utils: ## Runs unit tests for the util scripts - @cd pipelines && \ - poetry run python -m pytest tests/utils - -compile-pipeline: ## Compile the pipeline to pipeline.yaml. Must specify pipeline= - @cd pipelines/src && \ - poetry run kfp dsl compile --py pipelines/${pipeline}/pipeline.py --output pipelines/${pipeline}/pipeline.yaml --function pipeline - -setup-components: ## Run unit tests for a component group - @cd "components/${GROUP}" && \ - poetry install --with dev - -setup-all-components: ## Run unit tests for all pipeline components - @set -e && \ - for component_group in components/*/ ; do \ - echo "Setup components under $$component_group" && \ - $(MAKE) setup-components GROUP=$$(basename $$component_group) ; \ - done - -test-components: ## Run unit tests for a component group - @cd "components/${GROUP}" && \ - poetry run pytest - -test-all-components: ## Run unit tests for all pipeline components - @set -e && \ - for component_group in components/*/ ; do \ - echo "Test components under $$component_group" && \ - $(MAKE) test-components GROUP=$$(basename $$component_group) ; \ - done - -test-components-coverage: ## Run tests with coverage - @cd "components/${GROUP}" && \ - poetry run coverage run -m pytest && \ - poetry run coverage report -m - -test-all-components-coverage: ## Run tests with coverage - @set -e && \ - for component_group in components/*/ ; do \ - echo "Test components under $$component_group" && \ - $(MAKE) test-components-coverage GROUP=$$(basename $$component_group) ; \ - done - -run: ## Compile pipeline and run pipeline in sandbox environment. Must specify pipeline=. Optionally specify ENABLE_PIPELINE_CACHING= (defaults to default Vertex caching behaviour) - @ $(MAKE) compile-pipeline && \ - cd pipelines/src && \ - poetry run python -m pipelines.utils.trigger_pipeline --template_path=pipelines/${pipeline}/pipeline.yaml --display_name=${pipeline} - -e2e-tests: ## Perform end-to-end (E2E) pipeline tests. Must specify pipeline=. Optionally specify ENABLE_PIPELINE_CACHING= (defaults to default Vertex caching behaviour). - @ cd pipelines && \ - poetry run pytest --log-cli-level=INFO tests/$(pipeline) - env ?= dev deploy: ## Deploy the Terraform infrastructure to your project. Requires VERTEX_PROJECT_ID and VERTEX_LOCATION env variables to be set in env.sh. Optionally specify env= (default = dev) @ cd terraform/envs/$(env) && \ From 3bb06ff54463749409cc81d4be444d881575fc13 Mon Sep 17 00:00:00 2001 From: roberta-dt Date: Tue, 22 Aug 2023 12:15:16 +0100 Subject: [PATCH 07/11] build: update make commands in cloudbuild files --- Makefile | 10 +++++----- cloudbuild/e2e-test.yaml | 2 +- cloudbuild/pr-checks.yaml | 8 +++----- 3 files changed, 9 insertions(+), 11 deletions(-) diff --git a/Makefile b/Makefile index f0986df2..993e76cd 100644 --- a/Makefile +++ b/Makefile @@ -65,7 +65,7 @@ build: ## Build and push training and/or serving container(s) image using Docker compile ?=true build ?= true -run: ## Compile or build pipeline and run pipeline in sandbox environment. Compile and build set to true by default +run: ## Compile or build pipeline and run pipeline in sandbox environment. Set compile=false to skip recompiling the pipeline and set build=false to skip rebuilding container images @if [ "${compile}" ]; then \ $(MAKE) compile ; \ fi && \ @@ -73,7 +73,7 @@ run: ## Compile or build pipeline and run pipeline in sandbox environment. Compi $(MAKE) build ; \ fi && \ cd pipelines/src \ - poetry run python -m pipelines.trigger --template_path=pipelines/${pipeline}/pipeline.yaml --enable_caching=$(enable_pipeline_caching) + poetry run python -m pipelines.utils.trigger_pipeline --template_path=pipelines/${pipeline}/pipeline.yaml --display_name=${pipeline} test: ## Run unit tests for a component group or for all component groups and the pipeline trigger code. @@ -84,7 +84,7 @@ test: ## Run unit tests for a component group or for all component groups and th else \ echo "Testing scripts" && \ cd pipelines && \ - poetry run python -m pytest tests/trigger &&\ + poetry run python -m pytest tests/utils &&\ cd .. && \ for i in components/*/ ; do \ echo "Test components under $$i" && \ @@ -95,6 +95,6 @@ test: ## Run unit tests for a component group or for all component groups and th fi -e2e-tests: ## Perform end-to-end (E2E) pipeline tests. Must specify pipeline=. Optionally specify enable_pipeline_caching= (defaults to default Vertex caching behaviour). +e2e-tests: ## Perform end-to-end (E2E) pipeline tests. Must specify pipeline=. Optionally specify ENABLE_PIPELINE_CACHING= (defaults to default Vertex caching behaviour). @ cd pipelines && \ - poetry run pytest --log-cli-level=INFO tests/$(pipeline) --enable_caching=$(enable_pipeline_caching) + poetry run pytest --log-cli-level=INFO tests/$(pipeline) diff --git a/cloudbuild/e2e-test.yaml b/cloudbuild/e2e-test.yaml index c2e77696..e225fcb9 100644 --- a/cloudbuild/e2e-test.yaml +++ b/cloudbuild/e2e-test.yaml @@ -44,7 +44,7 @@ steps: - | curl -sSL https://install.python-poetry.org | python3 - && \ export PATH="/builder/home/.local/bin:$$PATH" && \ - make setup && \ + make install && \ make e2e-tests pipeline=training && \ make e2e-tests pipeline=prediction env: diff --git a/cloudbuild/pr-checks.yaml b/cloudbuild/pr-checks.yaml index 40c1b9f3..085a1324 100644 --- a/cloudbuild/pr-checks.yaml +++ b/cloudbuild/pr-checks.yaml @@ -28,11 +28,9 @@ steps: git init && \ git add . && \ make pre-commit && \ - make test-utils && \ - make compile-pipeline pipeline=training && \ - make compile-pipeline pipeline=prediction && \ - make setup-all-components && \ - make test-all-components + make compile pipeline=training && \ + make compile pipeline=prediction && \ + make test env: - SKIP=terraform-fmt,git-dirty - CONTAINER_IMAGE_REGISTRY=dummy_value From 79d5f9929db8a157732b7b6c8ea08d3f59050542 Mon Sep 17 00:00:00 2001 From: roberta-dt Date: Wed, 23 Aug 2023 11:31:46 +0100 Subject: [PATCH 08/11] style: change copyright to 2023 --- Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Makefile b/Makefile index 993e76cd..fd0be165 100644 --- a/Makefile +++ b/Makefile @@ -1,4 +1,4 @@ -# Copyright 2022 Google LLC +# Copyright 2023 Google LLC # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. From e088d52b041d8b07cbb0f2f15a544059dcbbc386 Mon Sep 17 00:00:00 2001 From: roberta-dt Date: Wed, 23 Aug 2023 11:52:27 +0100 Subject: [PATCH 09/11] style: add quotes to doc strings --- Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Makefile b/Makefile index fd0be165..a330fd00 100644 --- a/Makefile +++ b/Makefile @@ -51,7 +51,7 @@ compile: ## Compile the pipeline to pipeline.yaml. Must specify pipeline= e.g. targets=training or targets=training serving (default) +build: ## Build and push training and/or serving container(s) image using Docker. Specify targets= e.g. targets=training or targets="training serving" (default) @cd model && \ for target in $$targets ; do \ echo "Building $$target image" && \ From 9dc734ebc01cb12f9310a87940099bd127dbaaab Mon Sep 17 00:00:00 2001 From: roberta-dt Date: Wed, 23 Aug 2023 15:11:12 +0100 Subject: [PATCH 10/11] docs: update README with new make commands --- README.md | 38 ++++++++++++++++++-------------------- pipelines/README.md | 4 ++-- 2 files changed, 20 insertions(+), 22 deletions(-) diff --git a/README.md b/README.md index 59169d5a..20de5baf 100644 --- a/README.md +++ b/README.md @@ -70,10 +70,9 @@ How to deploy this infrastructure is covered in a [later section](#deploying-inf 1. Install the correct Python version: `pyenv install` 1. Install poetry - follow the instructions in the [poetry documentation](https://python-poetry.org/docs/#installation) 1. Configure poetry to use the Python version from pyenv: `poetry config virtualenvs.prefer-active-python true` -1. Install poetry dependencies for ML pipelines: `make setup` +1. Install poetry dependencies for ML pipelines: `make install` 1. Install pre-commit hooks: `cd pipelines && poetry run pre-commit install` 1. Copy `env.sh.example` to `env.sh`, and update the environment variables in `env.sh` for your dev environment (particularly `VERTEX_PROJECT_ID`, `VERTEX_LOCATION` and `RESOURCE_SUFFIX`) -1. (Optional) If you want to make changes to the KFP components under [/components](/components/), set up the Python virtual environments for these by running `make setup-all-components` 1. Authenticate to Google Cloud 1. `gcloud auth login` 1. `gcloud auth application-default login` @@ -111,9 +110,9 @@ Install Terraform on your local machine. We recommend using [`tfswitch`](https:/ Now you can deploy the infrastructure using Terraform: ```bash -make deploy-infra env=dev VERTEX_PROJECT_ID= -make deploy-infra env=test VERTEX_PROJECT_ID= -make deploy-infra env=prod VERTEX_PROJECT_ID= +make deploy env=dev VERTEX_PROJECT_ID= +make deploy env=test VERTEX_PROJECT_ID= +make deploy env=prod VERTEX_PROJECT_ID= ``` #### Optional - Tearing down infrastructure @@ -121,9 +120,9 @@ make deploy-infra env=prod VERTEX_PROJECT_ID= To tear down the infrastructure you have created with Terraform, run these commands: ```bash -make destroy-infra env=dev VERTEX_PROJECT_ID= -make destroy-infra env=test VERTEX_PROJECT_ID= -make destroy-infra env=prod VERTEX_PROJECT_ID= +make undeploy env=dev VERTEX_PROJECT_ID= +make undeploy env=test VERTEX_PROJECT_ID= +make undeploy env=prod VERTEX_PROJECT_ID= ``` ### Example ML pipelines @@ -161,16 +160,16 @@ bq mk --transfer_config \ The [model/](/model/) directory contains the code for custom training and serving container images, including the model training script at [model/training/train.py](model/training/train.py). You can modify this to suit your own use case. -Build the training container image and push it to Artifact Registry with: +Build the training and serving container image and push to Artifact Registry with: ```bash -make build-container target=training +make build ``` -Do the same for the serving container image: +Do this to only build one image (for example): ```bash -make build-container target=serving +make build target=serving ``` ### Running Pipelines @@ -178,7 +177,7 @@ make build-container target=serving You can run the training pipeline (for example) with: ```bash -make run pipeline=training +make run pipeline=training ``` This will execute the pipeline using the chosen template on Vertex AI, namely it will: @@ -198,26 +197,25 @@ Unit tests and end-to-end (E2E) pipeline tests are performed using [pytest](http The unit tests for custom KFP components are run on each pull request, as well as the E2E tests. To run them on your local machine: ``` -make setup-all-components -make test-all-components +make install +make test ``` -Alternatively, only setup and install one of the component groups by running: +Alternatively, only test one of the component groups by running: ``` -make setup-components GROUP=vertex-components -make test-components GROUP=vertex-components +make test GROUP=vertex-components ``` To run end-to-end tests of a single pipeline, you can use: ``` -make e2e-tests pipeline= [ enable_caching= ] +make e2e-tests pipeline= ``` There are also unit tests for the utility scripts in [pipelines/src/pipelines/utils](/pipelines/src/pipelines/utils/). To run them on your local machine: ``` -make test-utils +make test ``` ## Customize pipelines diff --git a/pipelines/README.md b/pipelines/README.md index ec328036..dccf64be 100644 --- a/pipelines/README.md +++ b/pipelines/README.md @@ -49,7 +49,7 @@ This step is performed using a custom KFP component located in [components/bigqu The training step is defined as a [KFP container component](https://www.kubeflow.org/docs/components/pipelines/v2/components/container-components/) in the [pipeline.py](/pipelines/src/pipelines/training/pipeline.py) file. -The container image used for this component is built using CI/CD (or the `make build-container target=training` command if you want to build it during development). +The container image used for this component is built using CI/CD (or the `make build target=training` command if you want to build it during development). The source code for this container image (and the serving container image) can be found in the [model](/model/) directory. Dependencies are managed using Poetry. The model training script can be found at [model/training/train.py](/model/training/train.py) and can be modified to suit your use case. @@ -115,5 +115,5 @@ If the component is exactly the same and the arguments are exactly the same as i Since most of the ML projects take a long time and expensive computation resources, it is cost-effective to use cache when you are sure that the output of components is correct. In terms of [how to control cache reuse behavior](https://cloud.google.com/vertex-ai/docs/pipelines/configure-caching), in generally, you can do it for either a component or the entire pipeline (for all components). If you want to control caching behavior for individual components, add `.set_caching_options()` after each component when building a pipeline. -To change the caching behaviour of ALL components within a pipeline, you can specify this when you trigger the pipeline like so: `make run pipeline= enable_caching=` +To change the caching behaviour of ALL components within a pipeline, you can specify this when you trigger the pipeline like so: `make run pipeline=` It is suggested to start by disabling caching of components during development, until you have a good idea of how the caching behaviour works, as it can lead to unexpected results. From 9819d0236d875d3714a17396e681c60df061086b Mon Sep 17 00:00:00 2001 From: Jonny Browning Date: Wed, 23 Aug 2023 15:22:46 +0100 Subject: [PATCH 11/11] docs: README tweaks --- README.md | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/README.md b/README.md index 20de5baf..e2c54ab8 100644 --- a/README.md +++ b/README.md @@ -160,13 +160,13 @@ bq mk --transfer_config \ The [model/](/model/) directory contains the code for custom training and serving container images, including the model training script at [model/training/train.py](model/training/train.py). You can modify this to suit your own use case. -Build the training and serving container image and push to Artifact Registry with: +Build the training and serving container images and push them to Artifact Registry with: ```bash make build ``` -Do this to only build one image (for example): +Optionally specify the `target` variable to only build one of the images. For example, to build only the serving image: ```bash make build target=serving @@ -177,7 +177,7 @@ make build target=serving You can run the training pipeline (for example) with: ```bash -make run pipeline=training +make run pipeline=training ``` This will execute the pipeline using the chosen template on Vertex AI, namely it will: @@ -197,7 +197,6 @@ Unit tests and end-to-end (E2E) pipeline tests are performed using [pytest](http The unit tests for custom KFP components are run on each pull request, as well as the E2E tests. To run them on your local machine: ``` -make install make test ```