From d51ac9c045c5e3152b2ccb7ee901cef6863c9a03 Mon Sep 17 00:00:00 2001 From: Jonny Browning Date: Thu, 27 Jul 2023 10:01:58 +0100 Subject: [PATCH] consolidate training and serving image builds --- Makefile | 19 ++++------------ env.sh.example | 3 +-- model/{serving => }/.gcloudignore | 0 model/.gitignore | 1 - model/Dockerfile | 24 ++++++++++++++++++++ model/cloudbuild.yaml | 7 ++++++ model/serving/Dockerfile | 10 -------- model/training/.gcloudignore | 2 -- model/training/Dockerfile | 8 ------- pipelines/src/pipelines/training/pipeline.py | 8 ++++--- 10 files changed, 41 insertions(+), 41 deletions(-) rename model/{serving => }/.gcloudignore (100%) delete mode 100644 model/.gitignore create mode 100644 model/Dockerfile create mode 100644 model/cloudbuild.yaml delete mode 100644 model/serving/Dockerfile delete mode 100644 model/training/.gcloudignore delete mode 100644 model/training/Dockerfile diff --git a/Makefile b/Makefile index ef889513..72adc2a1 100644 --- a/Makefile +++ b/Makefile @@ -91,22 +91,11 @@ destroy-infra: ## DESTROY the Terraform infrastructure in your project. Requires terraform init -backend-config='bucket=${VERTEX_PROJECT_ID}-tfstate' && \ terraform destroy -var 'project_id=${VERTEX_PROJECT_ID}' -var 'region=${VERTEX_LOCATION}' -build-training-container: ## Build and push training container image using Docker +target ?= training +build-container: ## Build and push training/serving container image using Docker. Specify target= @ cd model && \ - poetry export -f requirements.txt -o training/requirements.txt && \ - cd training && \ gcloud builds submit . \ - --tag=${TRAINING_CONTAINER_IMAGE} \ --region=${VERTEX_LOCATION} \ --project=${VERTEX_PROJECT_ID} \ - --gcs-source-staging-dir=gs://${VERTEX_PROJECT_ID}-staging/source - -build-serving-container: ## Build and push serving container image using Docker - @ cd model && \ - poetry export --with serving -f requirements.txt -o serving/requirements.txt && \ - cd serving && \ - gcloud builds submit . \ - --tag=${SERVING_CONTAINER_IMAGE} \ - --region=${VERTEX_LOCATION} \ - --project=${VERTEX_PROJECT_ID} \ - --gcs-source-staging-dir=gs://${VERTEX_PROJECT_ID}-staging/source + --gcs-source-staging-dir=gs://${VERTEX_PROJECT_ID}-staging/source \ + --substitutions=_DOCKER_TARGET=${target},_DESTINATION_IMAGE_URI=${CONTAINER_IMAGE_REGISTRY}/${target}:${RESOURCE_SUFFIX} diff --git a/env.sh.example b/env.sh.example index eedd1308..63152c5c 100644 --- a/env.sh.example +++ b/env.sh.example @@ -24,5 +24,4 @@ export RESOURCE_SUFFIX=default # Leave as-is export VERTEX_SA_EMAIL=vertex-pipelines@${VERTEX_PROJECT_ID}.iam.gserviceaccount.com export VERTEX_PIPELINE_ROOT=gs://${VERTEX_PROJECT_ID}-pl-root -export TRAINING_CONTAINER_IMAGE=${VERTEX_LOCATION}-docker.pkg.dev/${VERTEX_PROJECT_ID}/vertex-images/training:${RESOURCE_SUFFIX} -export SERVING_CONTAINER_IMAGE=${VERTEX_LOCATION}-docker.pkg.dev/${VERTEX_PROJECT_ID}/vertex-images/serving:${RESOURCE_SUFFIX} +export CONTAINER_IMAGE_REGISTRY=${VERTEX_LOCATION}-docker.pkg.dev/${VERTEX_PROJECT_ID}/vertex-images diff --git a/model/serving/.gcloudignore b/model/.gcloudignore similarity index 100% rename from model/serving/.gcloudignore rename to model/.gcloudignore diff --git a/model/.gitignore b/model/.gitignore deleted file mode 100644 index 4414fc1e..00000000 --- a/model/.gitignore +++ /dev/null @@ -1 +0,0 @@ -requirements.txt diff --git a/model/Dockerfile b/model/Dockerfile new file mode 100644 index 00000000..fb94433e --- /dev/null +++ b/model/Dockerfile @@ -0,0 +1,24 @@ +FROM python:3.9.16-slim AS builder + +ENV PIP_NO_CACHE_DIR=off \ + PIP_DISABLE_PIP_VERSION_CHECK=on \ + PIP_DEFAULT_TIMEOUT=100 + +ARG POETRY_VERSION=1.5.1 + +COPY pyproject.toml pyproject.toml +COPY poetry.lock poetry.lock + +RUN pip install poetry==${POETRY_VERSION} +RUN poetry install + +FROM builder AS training + +COPY training/train.py training/train.py + +FROM builder AS serving + +RUN poetry install --with serving +COPY serving/main.py serving/main.py + +CMD exec uvicorn serving.main:app --host "0.0.0.0" --port "$AIP_HTTP_PORT" diff --git a/model/cloudbuild.yaml b/model/cloudbuild.yaml new file mode 100644 index 00000000..167ceb0f --- /dev/null +++ b/model/cloudbuild.yaml @@ -0,0 +1,7 @@ +--- +steps: + - name: 'gcr.io/kaniko-project/executor:latest' + args: + - --destination=${_DESTINATION_IMAGE_URI} + - --target=${_DOCKER_TARGET} + - --cache=true diff --git a/model/serving/Dockerfile b/model/serving/Dockerfile deleted file mode 100644 index 71207403..00000000 --- a/model/serving/Dockerfile +++ /dev/null @@ -1,10 +0,0 @@ -FROM python:3.9.16-slim -ENV PIP_NO_CACHE_DIR=off \ - PIP_DISABLE_PIP_VERSION_CHECK=on \ - PIP_DEFAULT_TIMEOUT=100 - -COPY requirements.txt requirements.txt -RUN pip install -r requirements.txt -COPY main.py main.py - -CMD exec uvicorn main:app --host "0.0.0.0" --port "$AIP_HTTP_PORT" diff --git a/model/training/.gcloudignore b/model/training/.gcloudignore deleted file mode 100644 index 510bf9ce..00000000 --- a/model/training/.gcloudignore +++ /dev/null @@ -1,2 +0,0 @@ -.venv -.DS_Store diff --git a/model/training/Dockerfile b/model/training/Dockerfile deleted file mode 100644 index 4b4effce..00000000 --- a/model/training/Dockerfile +++ /dev/null @@ -1,8 +0,0 @@ -FROM python:3.9.16-slim -ENV PIP_NO_CACHE_DIR=off \ - PIP_DISABLE_PIP_VERSION_CHECK=on \ - PIP_DEFAULT_TIMEOUT=100 - -COPY requirements.txt requirements.txt -RUN pip install -r requirements.txt -COPY train.py train.py diff --git a/pipelines/src/pipelines/training/pipeline.py b/pipelines/src/pipelines/training/pipeline.py index 3e2c3ed4..a2906e44 100644 --- a/pipelines/src/pipelines/training/pipeline.py +++ b/pipelines/src/pipelines/training/pipeline.py @@ -22,8 +22,10 @@ from bigquery_components import extract_bq_to_dataset from vertex_components import upload_model -TRAINING_IMAGE = os.environ["TRAINING_CONTAINER_IMAGE"] -SERVING_IMAGE = os.environ["SERVING_CONTAINER_IMAGE"] +CONTAINER_IMAGE_REGISTRY = os.environ["CONTAINER_IMAGE_REGISTRY"] +RESOURCE_SUFFIX = os.environ.get("RESOURCE_SUFFIX", "default") +TRAINING_IMAGE = f"{CONTAINER_IMAGE_REGISTRY}/training:{RESOURCE_SUFFIX}" +SERVING_IMAGE = f"{CONTAINER_IMAGE_REGISTRY}/serving:{RESOURCE_SUFFIX}" @dsl.container_component @@ -40,7 +42,7 @@ def train( image=TRAINING_IMAGE, command=["python"], args=[ - "train.py", + "training/train.py", "--train-data", train_data.path, "--valid-data",