feat: add custom serving container

teamdatatonic · Jul 27, 2023 · 15b4c98 · 15b4c98
1 parent 5abfa36
commit 15b4c98
Show file tree

Hide file tree

Showing 15 changed files with 1,065 additions and 295 deletions.
diff --git a/Makefile b/Makefile
@@ -92,10 +92,21 @@ destroy-infra: ## DESTROY the Terraform infrastructure in your project. Requires
 	terraform destroy -var 'project_id=${VERTEX_PROJECT_ID}' -var 'region=${VERTEX_LOCATION}'
 
 build-training-container: ## Build and push training container image using Docker
-	@ cd training && \
-	poetry export -f requirements.txt -o requirements.txt && \
+	@ cd model && \
+	poetry export -f requirements.txt -o training/requirements.txt && \
+	cd training && \
 	gcloud builds submit . \
 	--tag=${TRAINING_CONTAINER_IMAGE} \
 	--region=${VERTEX_LOCATION} \
 	--project=${VERTEX_PROJECT_ID} \
 	--gcs-source-staging-dir=gs://${VERTEX_PROJECT_ID}-staging/source
+
+build-serving-container: ## Build and push serving container image using Docker
+	@ cd model && \
+	poetry export --with serving -f requirements.txt -o serving/requirements.txt && \
+	cd serving && \
+	gcloud builds submit . \
+	--tag=${SERVING_CONTAINER_IMAGE} \
+	--region=${VERTEX_LOCATION} \
+	--project=${VERTEX_PROJECT_ID} \
+	--gcs-source-staging-dir=gs://${VERTEX_PROJECT_ID}-staging/source
diff --git a/components/vertex-components/src/vertex_components/upload_model.py b/components/vertex-components/src/vertex_components/upload_model.py
@@ -193,6 +193,8 @@ def import_evaluation(
         display_name=model_name,
         artifact_uri=model.uri,
         serving_container_image_uri=serving_container_image,
+        serving_container_predict_route="/predict",
+        serving_container_health_route="/healthz",
         parent_model=(
             champion_model.resource_name if champion_model is not None else None
         ),

diff --git a/env.sh.example b/env.sh.example
@@ -25,3 +25,4 @@ export RESOURCE_SUFFIX=default
 export VERTEX_SA_EMAIL=vertex-pipelines@${VERTEX_PROJECT_ID}.iam.gserviceaccount.com
 export VERTEX_PIPELINE_ROOT=gs://${VERTEX_PROJECT_ID}-pl-root
 export TRAINING_CONTAINER_IMAGE=${VERTEX_LOCATION}-docker.pkg.dev/${VERTEX_PROJECT_ID}/vertex-images/training:${RESOURCE_SUFFIX}
+export SERVING_CONTAINER_IMAGE=${VERTEX_LOCATION}-docker.pkg.dev/${VERTEX_PROJECT_ID}/vertex-images/serving:${RESOURCE_SUFFIX}
diff --git a/training/.gitignore → model/.gitignore b/training/.gitignore → model/.gitignore
diff --git a/model/poetry.lock b/model/poetry.lock
diff --git a/training/pyproject.toml → model/pyproject.toml b/training/pyproject.toml → model/pyproject.toml
@@ -1,5 +1,5 @@
 [tool.poetry]
-name = "training"
+name = "model"
 version = "0.1.0"
 description = ""
 authors = ["Your Name <[email protected]>"]
@@ -12,6 +12,11 @@ xgboost = "^1.7.6"
 pandas = "^2.0.3"
 
 
+[tool.poetry.group.serving.dependencies]
+fastapi = {extras = ["uvicorn"], version = "^0.100.0"}
+uvicorn = "^0.23.1"
+google-cloud-storage = "^2.10.0"
+
 [build-system]
 requires = ["poetry-core"]
 build-backend = "poetry.core.masonry.api"
diff --git a/training/.gcloudignore → model/serving/.gcloudignore b/training/.gcloudignore → model/serving/.gcloudignore
diff --git a/model/serving/Dockerfile b/model/serving/Dockerfile
@@ -0,0 +1,10 @@
+FROM python:3.9.16-slim
+ENV PIP_NO_CACHE_DIR=off \
+    PIP_DISABLE_PIP_VERSION_CHECK=on \
+    PIP_DEFAULT_TIMEOUT=100
+
+COPY requirements.txt requirements.txt
+RUN pip install -r requirements.txt
+COPY main.py main.py
+
+CMD exec uvicorn main:app --host "0.0.0.0" --port "$AIP_HTTP_PORT"
diff --git a/model/serving/main.py b/model/serving/main.py
@@ -0,0 +1,29 @@
+import joblib
+import os
+
+import pandas as pd
+from fastapi import FastAPI, Request
+from google.cloud import storage
+
+app = FastAPI()
+client = storage.Client()
+
+with open("model.joblib", "wb") as f:
+    client.download_blob_to_file(f"{os.environ['AIP_STORAGE_URI']}/model.joblib", f)
+_model = joblib.load("model.joblib")
+
+
+@app.get(os.environ.get("AIP_HEALTH_ROUTE", "/healthz"))
+def health():
+    return {}
+
+
+@app.post(os.environ.get("AIP_PREDICT_ROUTE", "/predict"))
+async def predict(request: Request):
+    body = await request.json()
+
+    instances = body["instances"]
+    inputs_df = pd.DataFrame(instances)
+    outputs = _model.predict(inputs_df).tolist()
+
+    return {"predictions": outputs}
diff --git a/model/training/.gcloudignore b/model/training/.gcloudignore
@@ -0,0 +1,2 @@
+.venv
+.DS_Store
diff --git a/training/Dockerfile → model/training/Dockerfile b/training/Dockerfile → model/training/Dockerfile
@@ -5,4 +5,4 @@ ENV PIP_NO_CACHE_DIR=off \
 
 COPY requirements.txt requirements.txt
 RUN pip install -r requirements.txt
-COPY main.py main.py
+COPY train.py train.py
diff --git a/training/main.py → model/training/train.py b/training/main.py → model/training/train.py
diff --git a/pipelines/src/pipelines/prediction/pipeline.py b/pipelines/src/pipelines/prediction/pipeline.py
@@ -78,8 +78,8 @@ def pipeline(
     # into different components of the pipeline
     time_column = "trip_start_timestamp"
     ingestion_table = "taxi_trips"
-    table_suffix = "_xgb_prediction" + str(resource_suffix)  # suffix to table names
-    ingested_table = "ingested_data" + table_suffix
+    table_suffix = "_xgb_prediction_" + str(resource_suffix)  # suffix to table names
+    ingested_table = "ingested_data_" + table_suffix
     monitoring_alert_email_addresses = []
     monitoring_skew_config = {"defaultSkewThreshold": {"value": 0.001}}
 
@@ -131,6 +131,9 @@ def pipeline(
             destination_uri=bigquery_destination_output_uri,
             source_format="bigquery",
             destination_format="bigquery",
+            instance_config={
+                "instanceType": "object",
+            },
             machine_type=batch_prediction_machine_type,
             starting_replica_count=batch_prediction_min_replicas,
             max_replica_count=batch_prediction_max_replicas,

diff --git a/pipelines/src/pipelines/training/pipeline.py b/pipelines/src/pipelines/training/pipeline.py
@@ -22,7 +22,8 @@
 from bigquery_components import extract_bq_to_dataset
 from vertex_components import upload_model
 
-IMAGE = os.environ.get("TRAINING_CONTAINER_IMAGE")
+TRAINING_IMAGE = os.environ["TRAINING_CONTAINER_IMAGE"]
+SERVING_IMAGE = os.environ["SERVING_CONTAINER_IMAGE"]
 
 
 @dsl.container_component
@@ -36,10 +37,10 @@ def train(
     hparams: dict,
 ):
     return dsl.ContainerSpec(
-        image=IMAGE,
+        image=TRAINING_IMAGE,
         command=["python"],
         args=[
-            "main.py",
+            "train.py",
             "--train-data",
             train_data.path,
             "--valid-data",
@@ -197,9 +198,7 @@ def pipeline(
         eval_metric=primary_metric,
         eval_lower_is_better=True,
         model=train_model.outputs["model"],
-        serving_container_image=(
-            "europe-docker.pkg.dev/vertex-ai/prediction/sklearn-cpu.0-24:latest"
-        ),
+        serving_container_image=SERVING_IMAGE,
         model_name=model_name,
         pipeline_job_id="{{$.pipeline_job_name}}",
         test_dataset=test_dataset,