forked from googleapis/python-aiplatform
-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
feat: Added the PipelineJob.from_pipeline_func method (googleapis#1415)
The new factory method reduces the pipeline submission boilerplate to absolute minimum. ```python aiplatform.PipelineJob.from_pipeline_func(training_pipeline).submit() ``` What it does: 1. Compiles pipeline 2. Provides sensible default values for the pipeline display name, job_id, context etc. 3. Generates GCS directory for the pipeline output artifacts if needed 4. Creates the GCS bucket for the artifacts if it does not exist. (And gives the Pipelines service account the required permissions) Example usage: ```python def training_pipeline(number_of_epochs: int = 10): train_op( number_of_epochs=number_of_epochs, learning_rate="0.1", ) job = aiplatform.PipelineJob.from_pipeline_func(training_pipeline) job.submit() ``` --- Thank you for opening a Pull Request! Before submitting your PR, there are a few things you can do to make sure it goes smoothly: - [ ] Make sure to open an issue as a [bug/issue](https://github.com/googleapis/python-aiplatform/issues/new/choose) before writing your code! That way we can discuss the change, evaluate designs, and agree on the general idea - [x] Ensure the tests and linter pass - [x] Code coverage does not decrease (if any source code was changed) - [ ] Appropriate docs were updated (if necessary) Fixes #<issue_number_goes_here> 🦕
- Loading branch information
1 parent
4ef99e9
commit 4b06384
Showing
5 changed files
with
372 additions
and
4 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,6 +1,6 @@ | ||
# -*- coding: utf-8 -*- | ||
|
||
# Copyright 2021 Google LLC | ||
# Copyright 2022 Google LLC | ||
# | ||
# Licensed under the Apache License, Version 2.0 (the "License"); | ||
# you may not use this file except in compliance with the License. | ||
|
@@ -25,7 +25,7 @@ | |
from google.cloud import storage | ||
|
||
from google.cloud.aiplatform import initializer | ||
|
||
from google.cloud.aiplatform.utils import resource_manager_utils | ||
|
||
_logger = logging.getLogger(__name__) | ||
|
||
|
@@ -163,3 +163,103 @@ def stage_local_data_in_gcs( | |
) | ||
|
||
return staged_data_uri | ||
|
||
|
||
def generate_gcs_directory_for_pipeline_artifacts( | ||
project: Optional[str] = None, | ||
location: Optional[str] = None, | ||
): | ||
"""Gets or creates the GCS directory for Vertex Pipelines artifacts. | ||
Args: | ||
service_account: Optional. Google Cloud service account that will be used | ||
to run the pipelines. If this function creates a new bucket it will give | ||
permission to the specified service account to access the bucket. | ||
If not provided, the Google Cloud Compute Engine service account will be used. | ||
project: Optional. Google Cloud Project that contains the staging bucket. | ||
location: Optional. Google Cloud location to use for the staging bucket. | ||
Returns: | ||
Google Cloud Storage URI of the staged data. | ||
""" | ||
project = project or initializer.global_config.project | ||
location = location or initializer.global_config.location | ||
|
||
pipelines_bucket_name = project + "-vertex-pipelines-" + location | ||
output_artifacts_gcs_dir = "gs://" + pipelines_bucket_name + "/output_artifacts/" | ||
return output_artifacts_gcs_dir | ||
|
||
|
||
def create_gcs_bucket_for_pipeline_artifacts_if_it_does_not_exist( | ||
output_artifacts_gcs_dir: Optional[str] = None, | ||
service_account: Optional[str] = None, | ||
project: Optional[str] = None, | ||
location: Optional[str] = None, | ||
credentials: Optional[auth_credentials.Credentials] = None, | ||
): | ||
"""Gets or creates the GCS directory for Vertex Pipelines artifacts. | ||
Args: | ||
output_artifacts_gcs_dir: Optional. The GCS location for the pipeline outputs. | ||
It will be generated if not specified. | ||
service_account: Optional. Google Cloud service account that will be used | ||
to run the pipelines. If this function creates a new bucket it will give | ||
permission to the specified service account to access the bucket. | ||
If not provided, the Google Cloud Compute Engine service account will be used. | ||
project: Optional. Google Cloud Project that contains the staging bucket. | ||
location: Optional. Google Cloud location to use for the staging bucket. | ||
credentials: The custom credentials to use when making API calls. | ||
If not provided, default credentials will be used. | ||
Returns: | ||
Google Cloud Storage URI of the staged data. | ||
""" | ||
project = project or initializer.global_config.project | ||
location = location or initializer.global_config.location | ||
credentials = credentials or initializer.global_config.credentials | ||
|
||
output_artifacts_gcs_dir = ( | ||
output_artifacts_gcs_dir | ||
or generate_gcs_directory_for_pipeline_artifacts( | ||
project=project, | ||
location=location, | ||
) | ||
) | ||
|
||
# Creating the bucket if needed | ||
storage_client = storage.Client( | ||
project=project, | ||
credentials=credentials, | ||
) | ||
|
||
pipelines_bucket = storage.Blob.from_string( | ||
uri=output_artifacts_gcs_dir, | ||
client=storage_client, | ||
).bucket | ||
|
||
if not pipelines_bucket.exists(): | ||
_logger.info( | ||
f'Creating GCS bucket for Vertex Pipelines: "{pipelines_bucket.name}"' | ||
) | ||
pipelines_bucket = storage_client.create_bucket( | ||
bucket_or_name=pipelines_bucket, | ||
project=project, | ||
location=location, | ||
) | ||
# Giving the service account read and write access to the new bucket | ||
# Workaround for error: "Failed to create pipeline job. Error: Service account `[email protected]` | ||
# does not have `[storage.objects.get, storage.objects.create]` IAM permission(s) to the bucket `xxxxxxxx-vertex-pipelines-us-central1`. | ||
# Please either copy the files to the Google Cloud Storage bucket owned by your project, or grant the required IAM permission(s) to the service account." | ||
if not service_account: | ||
# Getting the project number to use in service account | ||
project_number = resource_manager_utils.get_project_number(project) | ||
service_account = f"{project_number}[email protected]" | ||
bucket_iam_policy = pipelines_bucket.get_iam_policy() | ||
bucket_iam_policy.setdefault("roles/storage.objectCreator", set()).add( | ||
f"serviceAccount:{service_account}" | ||
) | ||
bucket_iam_policy.setdefault("roles/storage.objectViewer", set()).add( | ||
f"serviceAccount:{service_account}" | ||
) | ||
pipelines_bucket.set_iam_policy(bucket_iam_policy) | ||
return output_artifacts_gcs_dir |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,61 @@ | ||
# -*- coding: utf-8 -*- | ||
|
||
# Copyright 2022 Google LLC | ||
# | ||
# Licensed under the Apache License, Version 2.0 (the "License"); | ||
# you may not use this file except in compliance with the License. | ||
# You may obtain a copy of the License at | ||
# | ||
# http://www.apache.org/licenses/LICENSE-2.0 | ||
# | ||
# Unless required by applicable law or agreed to in writing, software | ||
# distributed under the License is distributed on an "AS IS" BASIS, | ||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | ||
# See the License for the specific language governing permissions and | ||
# limitations under the License. | ||
# | ||
|
||
import pytest | ||
|
||
from google.cloud import aiplatform | ||
from tests.system.aiplatform import e2e_base | ||
|
||
|
||
@pytest.mark.usefixtures("tear_down_resources") | ||
class TestPipelineJob(e2e_base.TestEndToEnd): | ||
|
||
_temp_prefix = "tmpvrtxsdk-e2e" | ||
|
||
def test_add_pipeline_job_to_experiment(self, shared_state): | ||
from kfp import components | ||
|
||
# Components: | ||
def train( | ||
number_of_epochs: int, | ||
learning_rate: float, | ||
): | ||
print(f"number_of_epochs={number_of_epochs}") | ||
print(f"learning_rate={learning_rate}") | ||
|
||
train_op = components.create_component_from_func(train) | ||
|
||
# Pipeline: | ||
def training_pipeline(number_of_epochs: int = 10): | ||
train_op( | ||
number_of_epochs=number_of_epochs, | ||
learning_rate="0.1", | ||
) | ||
|
||
# Submitting the pipeline: | ||
aiplatform.init( | ||
project=e2e_base._PROJECT, | ||
location=e2e_base._LOCATION, | ||
) | ||
job = aiplatform.PipelineJob.from_pipeline_func( | ||
pipeline_func=training_pipeline, | ||
) | ||
job.submit() | ||
|
||
shared_state.setdefault("resources", []).append(job) | ||
|
||
job.wait() |
Oops, something went wrong.