From 486400bb868dcf072b39e33422994f1c33117084 Mon Sep 17 00:00:00 2001 From: Michael Hu Date: Tue, 24 May 2022 15:34:08 -0400 Subject: [PATCH] clean up resources --- .../system/aiplatform/test_e2e_forecasting.py | 150 +++++++++--------- 1 file changed, 79 insertions(+), 71 deletions(-) diff --git a/tests/system/aiplatform/test_e2e_forecasting.py b/tests/system/aiplatform/test_e2e_forecasting.py index 21b7249b7e..f1e279d8af 100644 --- a/tests/system/aiplatform/test_e2e_forecasting.py +++ b/tests/system/aiplatform/test_e2e_forecasting.py @@ -38,79 +38,87 @@ class TestEndToEndForecasting(e2e_base.TestEndToEnd): def test_end_to_end_forecasting(self, shared_state): """Builds a dataset, trains models, and gets batch predictions.""" - # Collection of resources generated by this test, to be deleted during - # teardown. - shared_state["resources"] = [] - aiplatform.init( project=e2e_base._PROJECT, location=e2e_base._LOCATION, staging_bucket=shared_state["staging_bucket_name"], ) - - # Create and import to single managed dataset for both training jobs. - ds = aiplatform.TimeSeriesDataset.create( - display_name=self._make_display_name("dataset"), - bq_source=[_TRAINING_DATASET_BQ_PATH], - sync=False, - create_request_timeout=180.0, - ) - shared_state["resources"].extend([ds]) - - time_column = "date" - time_series_identifier_column = "store_name" - target_column = "sale_dollars" - column_specs = { - time_column: "timestamp", - target_column: "numeric", - "city": "categorical", - "zip_code": "categorical", - "county": "categorical", - } - - # Define both training jobs - # TODO(humichael): Add seq2seq job. - automl_job = aiplatform.AutoMLForecastingTrainingJob( - display_name=self._make_display_name("train-housing-automl"), - optimization_objective="minimize-rmse", - column_specs=column_specs, - ) - - # Kick off both training jobs, AutoML job will take approx one hour to - # run. - automl_model = automl_job.run( - dataset=ds, - target_column=target_column, - time_column=time_column, - time_series_identifier_column=time_series_identifier_column, - available_at_forecast_columns=[time_column], - unavailable_at_forecast_columns=[target_column], - time_series_attribute_columns=["city", "zip_code", "county"], - forecast_horizon=30, - context_window=30, - data_granularity_unit="day", - data_granularity_count=1, - budget_milli_node_hours=1000, - model_display_name=self._make_display_name("automl-liquor-model"), - sync=False, - ) - shared_state["resources"].extend([automl_job, automl_model]) - - automl_batch_prediction_job = automl_model.batch_predict( - job_display_name=self._make_display_name("automl-liquor-model"), - instances_format="bigquery", - machine_type="n1-standard-4", - bigquery_source=_PREDICTION_DATASET_BQ_PATH, - gcs_destination_prefix=( - f'gs://{shared_state["staging_bucket_name"]}/bp_results/' - ), - sync=False, - ) - shared_state["resources"].append(automl_batch_prediction_job) - - automl_batch_prediction_job.wait() - - assert automl_job.state == pipeline_state.PipelineState.PIPELINE_STATE_SUCCEEDED - assert ( - automl_batch_prediction_job.state == job_state.JobState.JOB_STATE_SUCCEEDED - ) + try: + # Create and import to single managed dataset for both training + # jobs. + ds = aiplatform.TimeSeriesDataset.create( + display_name=self._make_display_name("dataset"), + bq_source=[_TRAINING_DATASET_BQ_PATH], + sync=False, + create_request_timeout=180.0, + ) + + time_column = "date" + time_series_identifier_column = "store_name" + target_column = "sale_dollars" + column_specs = { + time_column: "timestamp", + target_column: "numeric", + "city": "categorical", + "zip_code": "categorical", + "county": "categorical", + } + + # Define both training jobs + # TODO(humichael): Add seq2seq job. + automl_job = aiplatform.AutoMLForecastingTrainingJob( + display_name=self._make_display_name("train-housing-automl"), + optimization_objective="minimize-rmse", + column_specs=column_specs, + ) + + # Kick off both training jobs, AutoML job will take approx one hour + # to run. + automl_model = automl_job.run( + dataset=ds, + target_column=target_column, + time_column=time_column, + time_series_identifier_column=time_series_identifier_column, + available_at_forecast_columns=[time_column], + unavailable_at_forecast_columns=[target_column], + time_series_attribute_columns=["city", "zip_code", "county"], + forecast_horizon=30, + context_window=30, + data_granularity_unit="day", + data_granularity_count=1, + budget_milli_node_hours=1000, + model_display_name=self._make_display_name("automl-liquor-model"), + sync=False, + ) + + automl_batch_prediction_job = automl_model.batch_predict( + job_display_name=self._make_display_name("automl-liquor-model"), + instances_format="bigquery", + machine_type="n1-standard-4", + bigquery_source=_PREDICTION_DATASET_BQ_PATH, + gcs_destination_prefix=( + f'gs://{shared_state["staging_bucket_name"]}/bp_results/' + ), + sync=False, + ) + + automl_batch_prediction_job.wait() + + assert ( + automl_job.state + == pipeline_state.PipelineState.PIPELINE_STATE_SUCCEEDED + ) + assert ( + automl_batch_prediction_job.state + == job_state.JobState.JOB_STATE_SUCCEEDED + ) + finally: + resources = [ + "ds", + "automl_job", + "automl_model", + "automl_batch_prediction_job", + ] + for resource in resources: + if resource in locals(): + resource.delete()