Skip to content

Commit

Permalink
clean up resources
Browse files Browse the repository at this point in the history
  • Loading branch information
TheMichaelHu committed May 24, 2022
1 parent c1161fa commit 486400b
Showing 1 changed file with 79 additions and 71 deletions.
150 changes: 79 additions & 71 deletions tests/system/aiplatform/test_e2e_forecasting.py
Original file line number Diff line number Diff line change
Expand Up @@ -38,79 +38,87 @@ class TestEndToEndForecasting(e2e_base.TestEndToEnd):
def test_end_to_end_forecasting(self, shared_state):
"""Builds a dataset, trains models, and gets batch predictions."""

# Collection of resources generated by this test, to be deleted during
# teardown.
shared_state["resources"] = []

aiplatform.init(
project=e2e_base._PROJECT,
location=e2e_base._LOCATION,
staging_bucket=shared_state["staging_bucket_name"],
)

# Create and import to single managed dataset for both training jobs.
ds = aiplatform.TimeSeriesDataset.create(
display_name=self._make_display_name("dataset"),
bq_source=[_TRAINING_DATASET_BQ_PATH],
sync=False,
create_request_timeout=180.0,
)
shared_state["resources"].extend([ds])

time_column = "date"
time_series_identifier_column = "store_name"
target_column = "sale_dollars"
column_specs = {
time_column: "timestamp",
target_column: "numeric",
"city": "categorical",
"zip_code": "categorical",
"county": "categorical",
}

# Define both training jobs
# TODO(humichael): Add seq2seq job.
automl_job = aiplatform.AutoMLForecastingTrainingJob(
display_name=self._make_display_name("train-housing-automl"),
optimization_objective="minimize-rmse",
column_specs=column_specs,
)

# Kick off both training jobs, AutoML job will take approx one hour to
# run.
automl_model = automl_job.run(
dataset=ds,
target_column=target_column,
time_column=time_column,
time_series_identifier_column=time_series_identifier_column,
available_at_forecast_columns=[time_column],
unavailable_at_forecast_columns=[target_column],
time_series_attribute_columns=["city", "zip_code", "county"],
forecast_horizon=30,
context_window=30,
data_granularity_unit="day",
data_granularity_count=1,
budget_milli_node_hours=1000,
model_display_name=self._make_display_name("automl-liquor-model"),
sync=False,
)
shared_state["resources"].extend([automl_job, automl_model])

automl_batch_prediction_job = automl_model.batch_predict(
job_display_name=self._make_display_name("automl-liquor-model"),
instances_format="bigquery",
machine_type="n1-standard-4",
bigquery_source=_PREDICTION_DATASET_BQ_PATH,
gcs_destination_prefix=(
f'gs://{shared_state["staging_bucket_name"]}/bp_results/'
),
sync=False,
)
shared_state["resources"].append(automl_batch_prediction_job)

automl_batch_prediction_job.wait()

assert automl_job.state == pipeline_state.PipelineState.PIPELINE_STATE_SUCCEEDED
assert (
automl_batch_prediction_job.state == job_state.JobState.JOB_STATE_SUCCEEDED
)
try:
# Create and import to single managed dataset for both training
# jobs.
ds = aiplatform.TimeSeriesDataset.create(
display_name=self._make_display_name("dataset"),
bq_source=[_TRAINING_DATASET_BQ_PATH],
sync=False,
create_request_timeout=180.0,
)

time_column = "date"
time_series_identifier_column = "store_name"
target_column = "sale_dollars"
column_specs = {
time_column: "timestamp",
target_column: "numeric",
"city": "categorical",
"zip_code": "categorical",
"county": "categorical",
}

# Define both training jobs
# TODO(humichael): Add seq2seq job.
automl_job = aiplatform.AutoMLForecastingTrainingJob(
display_name=self._make_display_name("train-housing-automl"),
optimization_objective="minimize-rmse",
column_specs=column_specs,
)

# Kick off both training jobs, AutoML job will take approx one hour
# to run.
automl_model = automl_job.run(
dataset=ds,
target_column=target_column,
time_column=time_column,
time_series_identifier_column=time_series_identifier_column,
available_at_forecast_columns=[time_column],
unavailable_at_forecast_columns=[target_column],
time_series_attribute_columns=["city", "zip_code", "county"],
forecast_horizon=30,
context_window=30,
data_granularity_unit="day",
data_granularity_count=1,
budget_milli_node_hours=1000,
model_display_name=self._make_display_name("automl-liquor-model"),
sync=False,
)

automl_batch_prediction_job = automl_model.batch_predict(
job_display_name=self._make_display_name("automl-liquor-model"),
instances_format="bigquery",
machine_type="n1-standard-4",
bigquery_source=_PREDICTION_DATASET_BQ_PATH,
gcs_destination_prefix=(
f'gs://{shared_state["staging_bucket_name"]}/bp_results/'
),
sync=False,
)

automl_batch_prediction_job.wait()

assert (
automl_job.state
== pipeline_state.PipelineState.PIPELINE_STATE_SUCCEEDED
)
assert (
automl_batch_prediction_job.state
== job_state.JobState.JOB_STATE_SUCCEEDED
)
finally:
resources = [
"ds",
"automl_job",
"automl_model",
"automl_batch_prediction_job",
]
for resource in resources:
if resource in locals():
resource.delete()

0 comments on commit 486400b

Please sign in to comment.