From 50802889b8a44c92ca889ae749f1024c0c4fa9dc Mon Sep 17 00:00:00 2001 From: Sina Chavoshi <chavoshi@google.com> Date: Thu, 14 Apr 2022 10:18:24 -0700 Subject: [PATCH 1/7] feat(components/google-cloud): Add support for env variables in Custom_Job component. PiperOrigin-RevId: 441793614 --- .../v1/custom_job/utils.py | 5 ++ .../custom_job/unit/test_custom_job_utils.py | 52 +++++++++++++++++++ 2 files changed, 57 insertions(+) diff --git a/components/google-cloud/google_cloud_pipeline_components/v1/custom_job/utils.py b/components/google-cloud/google_cloud_pipeline_components/v1/custom_job/utils.py index e31965a248f9..301d0f405c75 100644 --- a/components/google-cloud/google_cloud_pipeline_components/v1/custom_job/utils.py +++ b/components/google-cloud/google_cloud_pipeline_components/v1/custom_job/utils.py @@ -186,6 +186,11 @@ def _is_output_parameter(output_key: str) -> bool: container_command_copy[idx] = _EXECUTOR_PLACE_HOLDER_REPLACEMENT worker_pool_spec['container_spec']['command'] = container_command_copy + if component_spec.component_spec.implementation.container.env: + worker_pool_spec['container_spec'][ + 'env'] = component_spec.component_spec.implementation.container.env.copy( + ) + if component_spec.component_spec.implementation.container.args: container_args_copy = component_spec.component_spec.implementation.container.args.copy( ) diff --git a/components/google-cloud/tests/v1/custom_job/unit/test_custom_job_utils.py b/components/google-cloud/tests/v1/custom_job/unit/test_custom_job_utils.py index c607bfa4399f..a14211844bb5 100644 --- a/components/google-cloud/tests/v1/custom_job/unit/test_custom_job_utils.py +++ b/components/google-cloud/tests/v1/custom_job/unit/test_custom_job_utils.py @@ -690,3 +690,55 @@ def test_run_as_vertex_ai_custom_with_nfs_mount(self): self.assertDictContainsSubset( subset=expected_sub_results, dictionary=custom_job_spec.component_spec.to_dict()) + + def test_run_as_vertex_ai_custom_with_environment_variable(self): + component_factory_function = self._create_a_container_based_component() + component_factory_function.component_spec.implementation.container.env = [ + 'test_env_variable' + ] + + expected_sub_results = { + 'implementation': { + 'container': { + 'image': + 'test_launcher_image', + 'command': [ + 'python3', '-u', '-m', + 'google_cloud_pipeline_components.container.v1.gcp_launcher.launcher' + ], + 'args': [ + '--type', 'CustomJob', '--payload', + '{"display_name": "ContainerComponent", "job_spec": ' + '{"worker_pool_specs": [{"machine_spec": {"machine_type": ' + '"n1-standard-4"}, "replica_count": 1, "container_spec": ' + '{"image_uri": "google/cloud-sdk:latest", "command": ' + '["sh", "-c", "set -e -x\\necho \\"$0, this is an output ' + 'parameter\\"\\n", ' + '"{{$.inputs.parameters[\'input_text\']}}", ' + '"{{$.outputs.parameters[\'output_value\'].output_file}}"],' + ' "env": ["test_env_variable"]}, "disk_spec": ' + '{"boot_disk_type": "pd-ssd", "boot_disk_size_gb": 100}}],' + ' "service_account": ' + '"{{$.inputs.parameters[\'service_account\']}}", ' + '"network": "{{$.inputs.parameters[\'network\']}}", ' + '"tensorboard": ' + '"{{$.inputs.parameters[\'tensorboard\']}}", ' + '"base_output_directory": {"output_uri_prefix": ' + '"{{$.inputs.parameters[\'base_output_directory\']}}"}}}', + '--project', { + 'inputValue': 'project' + }, '--location', { + 'inputValue': 'location' + }, '--gcp_resources', { + 'outputPath': 'gcp_resources' + } + ] + } + } + } + custom_job_spec = utils.create_custom_training_job_op_from_component( + component_factory_function) + + self.assertDictContainsSubset( + subset=expected_sub_results, + dictionary=custom_job_spec.component_spec.to_dict()) From 7f4e9a40e59008b08092d5de77efbc4e6b85a204 Mon Sep 17 00:00:00 2001 From: Joe Li <56132941+jlyaoyuli@users.noreply.github.com> Date: Thu, 14 Apr 2022 12:13:54 -0700 Subject: [PATCH 2/7] feat(frontend): Support default feature flags update in localStorage. Fix #7379 (#7537) * Modified the feature flags setup in localStorage #7379 * Fix redundant for loop and add try catch. * Remove redundant for loop and add try catch to handle unexpected errors. --- frontend/src/features.ts | 21 +++++++++++++++++++-- 1 file changed, 19 insertions(+), 2 deletions(-) diff --git a/frontend/src/features.ts b/frontend/src/features.ts index d5639d92068a..3f5fe746e42b 100644 --- a/frontend/src/features.ts +++ b/frontend/src/features.ts @@ -21,13 +21,30 @@ declare global { } export function initFeatures() { + let updatedFeatures = features; if (!storageAvailable('localStorage')) { window.__FEATURE_FLAGS__ = JSON.stringify(features); return; } - if (!localStorage.getItem('flags')) { - localStorage.setItem('flags', JSON.stringify(features)); + if (localStorage.getItem('flags')) { + const originalFlags = localStorage.getItem('flags'); + let originalFlagsJSON: Feature[] = []; + try { + originalFlagsJSON = JSON.parse(originalFlags!); + let originalFlagsMap = new Map(originalFlagsJSON.map(features => [features.name, features])); + for (let i = 0; i < updatedFeatures.length; i++) { + const feature = originalFlagsMap.get(updatedFeatures[i].name); + if (feature) { + updatedFeatures[i].active = feature.active; + } + } + } catch (e) { + console.warn( + 'Original feature flags format is null or not recognizable, overwriting with default feature flags.', + ); + } } + localStorage.setItem('flags', JSON.stringify(updatedFeatures)); const flags = localStorage.getItem('flags'); if (flags) { window.__FEATURE_FLAGS__ = flags; From 5a8202e8b2b82eaf69b0d8fd2c4bc33bcd1aae02 Mon Sep 17 00:00:00 2001 From: Michael Hu <humichael@google.com> Date: Thu, 14 Apr 2022 12:59:43 -0700 Subject: [PATCH 3/7] Add e2e tests for ARIMA pipeline and release to GCPC experimental dir. PiperOrigin-RevId: 441833115 --- .../automl/forecasting/__init__.py | 0 .../bqml_arima_predict_pipeline.json | 1057 +++++ .../bqml_arima_train_pipeline.json | 3421 +++++++++++++++++ .../experimental/automl/forecasting/utils.py | 150 + ...et_hyperparameter_tuning_job_pipeline.json | 7 +- components/google-cloud/setup.py | 2 +- 6 files changed, 4634 insertions(+), 3 deletions(-) create mode 100644 components/google-cloud/google_cloud_pipeline_components/experimental/automl/forecasting/__init__.py create mode 100644 components/google-cloud/google_cloud_pipeline_components/experimental/automl/forecasting/bqml_arima_predict_pipeline.json create mode 100644 components/google-cloud/google_cloud_pipeline_components/experimental/automl/forecasting/bqml_arima_train_pipeline.json create mode 100644 components/google-cloud/google_cloud_pipeline_components/experimental/automl/forecasting/utils.py diff --git a/components/google-cloud/google_cloud_pipeline_components/experimental/automl/forecasting/__init__.py b/components/google-cloud/google_cloud_pipeline_components/experimental/automl/forecasting/__init__.py new file mode 100644 index 000000000000..e69de29bb2d1 diff --git a/components/google-cloud/google_cloud_pipeline_components/experimental/automl/forecasting/bqml_arima_predict_pipeline.json b/components/google-cloud/google_cloud_pipeline_components/experimental/automl/forecasting/bqml_arima_predict_pipeline.json new file mode 100644 index 000000000000..bbc99d5d6910 --- /dev/null +++ b/components/google-cloud/google_cloud_pipeline_components/experimental/automl/forecasting/bqml_arima_predict_pipeline.json @@ -0,0 +1,1057 @@ +{ + "pipelineSpec": { + "components": { + "comp-bigquery-create-dataset": { + "executorLabel": "exec-bigquery-create-dataset", + "inputDefinitions": { + "parameters": { + "dataset": { + "type": "STRING" + }, + "exists_ok": { + "type": "STRING" + }, + "location": { + "type": "STRING" + }, + "project": { + "type": "STRING" + } + } + }, + "outputDefinitions": { + "parameters": { + "dataset_id": { + "type": "STRING" + }, + "project_id": { + "type": "STRING" + } + } + } + }, + "comp-bigquery-create-dataset-2": { + "executorLabel": "exec-bigquery-create-dataset-2", + "inputDefinitions": { + "parameters": { + "dataset": { + "type": "STRING" + }, + "exists_ok": { + "type": "STRING" + }, + "location": { + "type": "STRING" + }, + "project": { + "type": "STRING" + } + } + }, + "outputDefinitions": { + "parameters": { + "dataset_id": { + "type": "STRING" + }, + "project_id": { + "type": "STRING" + } + } + } + }, + "comp-bigquery-delete-dataset-with-prefix": { + "executorLabel": "exec-bigquery-delete-dataset-with-prefix", + "inputDefinitions": { + "parameters": { + "dataset_prefix": { + "type": "STRING" + }, + "delete_contents": { + "type": "STRING" + }, + "location": { + "type": "STRING" + }, + "project": { + "type": "STRING" + } + } + } + }, + "comp-bigquery-query-job": { + "executorLabel": "exec-bigquery-query-job", + "inputDefinitions": { + "parameters": { + "job_configuration_query": { + "type": "STRING" + }, + "labels": { + "type": "STRING" + }, + "location": { + "type": "STRING" + }, + "project": { + "type": "STRING" + }, + "query": { + "type": "STRING" + }, + "query_parameters": { + "type": "STRING" + } + } + }, + "outputDefinitions": { + "artifacts": { + "destination_table": { + "artifactType": { + "schemaTitle": "google.BQTable", + "schemaVersion": "0.0.1" + } + } + }, + "parameters": { + "gcp_resources": { + "type": "STRING" + } + } + } + }, + "comp-exit-handler-1": { + "dag": { + "tasks": { + "bigquery-create-dataset": { + "cachingOptions": { + "enableCache": true + }, + "componentRef": { + "name": "comp-bigquery-create-dataset" + }, + "dependentTasks": [ + "generate-iso8601-underscore-datetime-format" + ], + "inputs": { + "parameters": { + "dataset": { + "runtimeValue": { + "constantValue": { + "stringValue": "tmp_{{$.inputs.parameters['pipelineparam--generate-iso8601-underscore-datetime-format-Output']}}" + } + } + }, + "exists_ok": { + "runtimeValue": { + "constantValue": { + "stringValue": "False" + } + } + }, + "location": { + "componentInputParameter": "pipelineparam--location" + }, + "pipelineparam--generate-iso8601-underscore-datetime-format-Output": { + "taskOutputParameter": { + "outputParameterKey": "Output", + "producerTask": "generate-iso8601-underscore-datetime-format" + } + }, + "project": { + "componentInputParameter": "pipelineparam--project" + } + } + }, + "taskInfo": { + "name": "create-tmp-dataset" + } + }, + "bigquery-create-dataset-2": { + "cachingOptions": { + "enableCache": true + }, + "componentRef": { + "name": "comp-bigquery-create-dataset-2" + }, + "dependentTasks": [ + "maybe-replace-with-default" + ], + "inputs": { + "parameters": { + "dataset": { + "taskOutputParameter": { + "outputParameterKey": "Output", + "producerTask": "maybe-replace-with-default" + } + }, + "exists_ok": { + "runtimeValue": { + "constantValue": { + "intValue": "1" + } + } + }, + "location": { + "componentInputParameter": "pipelineparam--location" + }, + "project": { + "componentInputParameter": "pipelineparam--project" + } + } + }, + "taskInfo": { + "name": "create-prediction-dataset" + } + }, + "bigquery-query-job": { + "cachingOptions": { + "enableCache": true + }, + "componentRef": { + "name": "comp-bigquery-query-job" + }, + "dependentTasks": [ + "bigquery-create-dataset-2", + "get-first-valid", + "get-model-metadata" + ], + "inputs": { + "parameters": { + "job_configuration_query": { + "runtimeValue": { + "constantValue": { + "stringValue": "{\"destinationTable\": {\"projectId\": \"{{$.inputs.parameters['pipelineparam--bigquery-create-dataset-2-project_id']}}\", \"datasetId\": \"{{$.inputs.parameters['pipelineparam--bigquery-create-dataset-2-dataset_id']}}\", \"tableId\": \"predictions\"}, \"writeDisposition\": \"WRITE_EMPTY\"}" + } + } + }, + "labels": { + "runtimeValue": { + "constantValue": { + "stringValue": "{}" + } + } + }, + "location": { + "componentInputParameter": "pipelineparam--location" + }, + "pipelineparam--bigquery-create-dataset-2-dataset_id": { + "taskOutputParameter": { + "outputParameterKey": "dataset_id", + "producerTask": "bigquery-create-dataset-2" + } + }, + "pipelineparam--bigquery-create-dataset-2-project_id": { + "taskOutputParameter": { + "outputParameterKey": "project_id", + "producerTask": "bigquery-create-dataset-2" + } + }, + "pipelineparam--get-first-valid-Output": { + "taskOutputParameter": { + "outputParameterKey": "Output", + "producerTask": "get-first-valid" + } + }, + "pipelineparam--get-model-metadata-forecast_horizon": { + "taskOutputParameter": { + "outputParameterKey": "forecast_horizon", + "producerTask": "get-model-metadata" + } + }, + "pipelineparam--get-model-metadata-target_column_name": { + "taskOutputParameter": { + "outputParameterKey": "target_column_name", + "producerTask": "get-model-metadata" + } + }, + "pipelineparam--get-model-metadata-time_column": { + "taskOutputParameter": { + "outputParameterKey": "time_column", + "producerTask": "get-model-metadata" + } + }, + "pipelineparam--get-model-metadata-time_series_identifier_column": { + "taskOutputParameter": { + "outputParameterKey": "time_series_identifier_column", + "producerTask": "get-model-metadata" + } + }, + "pipelineparam--model_name": { + "componentInputParameter": "pipelineparam--model_name" + }, + "project": { + "componentInputParameter": "pipelineparam--project" + }, + "query": { + "runtimeValue": { + "constantValue": { + "stringValue": "\n SELECT\n target.*,\n STRUCT(prediction.time_series_adjusted_data AS value)\n AS predicted_{{$.inputs.parameters['pipelineparam--get-model-metadata-target_column_name']}},\n prediction.* EXCEPT (\n {{$.inputs.parameters['pipelineparam--get-model-metadata-time_series_identifier_column']}},\n time_series_timestamp,\n time_series_adjusted_data\n ),\n FROM\n ML.EXPLAIN_FORECAST(\n MODEL `{{$.inputs.parameters['pipelineparam--model_name']}}`,\n STRUCT({{$.inputs.parameters['pipelineparam--get-model-metadata-forecast_horizon']}} AS horizon)) AS prediction\n RIGHT JOIN `{{$.inputs.parameters['pipelineparam--get-first-valid-Output']}}` AS target\n ON\n CAST(target.{{$.inputs.parameters['pipelineparam--get-model-metadata-time_series_identifier_column']}} AS STRING)\n = prediction.{{$.inputs.parameters['pipelineparam--get-model-metadata-time_series_identifier_column']}}\n AND TIMESTAMP(target.{{$.inputs.parameters['pipelineparam--get-model-metadata-time_column']}}) = prediction.time_series_timestamp\n WHERE target.{{$.inputs.parameters['pipelineparam--get-model-metadata-target_column_name']}} IS NULL\n " + } + } + }, + "query_parameters": { + "runtimeValue": { + "constantValue": { + "stringValue": "[]" + } + } + } + } + }, + "taskInfo": { + "name": "predictions-table" + } + }, + "generate-iso8601-underscore-datetime-format": { + "cachingOptions": {}, + "componentRef": { + "name": "comp-generate-iso8601-underscore-datetime-format" + }, + "dependentTasks": [ + "validate-inputs" + ], + "inputs": { + "parameters": { + "run_id": { + "runtimeValue": { + "constantValue": { + "stringValue": "{{$.pipeline_job_uuid}}" + } + } + } + } + }, + "taskInfo": { + "name": "generate-iso8601-underscore-datetime-format" + } + }, + "get-first-valid": { + "cachingOptions": { + "enableCache": true + }, + "componentRef": { + "name": "comp-get-first-valid" + }, + "dependentTasks": [ + "load-table-from-uri", + "parse-data-source" + ], + "inputs": { + "parameters": { + "pipelineparam--load-table-from-uri-Output": { + "taskOutputParameter": { + "outputParameterKey": "Output", + "producerTask": "load-table-from-uri" + } + }, + "pipelineparam--parse-data-source-bq_source": { + "taskOutputParameter": { + "outputParameterKey": "bq_source", + "producerTask": "parse-data-source" + } + }, + "values": { + "runtimeValue": { + "constantValue": { + "stringValue": "[\"{{$.inputs.parameters['pipelineparam--parse-data-source-bq_source']}}\", \"{{$.inputs.parameters['pipelineparam--load-table-from-uri-Output']}}\"]" + } + } + } + } + }, + "taskInfo": { + "name": "get-first-valid" + } + }, + "get-model-metadata": { + "cachingOptions": { + "enableCache": true + }, + "componentRef": { + "name": "comp-get-model-metadata" + }, + "dependentTasks": [ + "validate-inputs" + ], + "inputs": { + "parameters": { + "location": { + "componentInputParameter": "pipelineparam--location" + }, + "model": { + "componentInputParameter": "pipelineparam--model_name" + }, + "project": { + "componentInputParameter": "pipelineparam--project" + } + } + }, + "taskInfo": { + "name": "get-model-metadata" + } + }, + "load-table-from-uri": { + "cachingOptions": { + "enableCache": true + }, + "componentRef": { + "name": "comp-load-table-from-uri" + }, + "dependentTasks": [ + "bigquery-create-dataset", + "parse-data-source" + ], + "inputs": { + "parameters": { + "destination": { + "runtimeValue": { + "constantValue": { + "stringValue": "{{$.inputs.parameters['pipelineparam--bigquery-create-dataset-project_id']}}.{{$.inputs.parameters['pipelineparam--bigquery-create-dataset-dataset_id']}}.csv_export" + } + } + }, + "location": { + "componentInputParameter": "pipelineparam--location" + }, + "pipelineparam--bigquery-create-dataset-dataset_id": { + "taskOutputParameter": { + "outputParameterKey": "dataset_id", + "producerTask": "bigquery-create-dataset" + } + }, + "pipelineparam--bigquery-create-dataset-project_id": { + "taskOutputParameter": { + "outputParameterKey": "project_id", + "producerTask": "bigquery-create-dataset" + } + }, + "project": { + "componentInputParameter": "pipelineparam--project" + }, + "source_format": { + "runtimeValue": { + "constantValue": { + "stringValue": "CSV" + } + } + }, + "source_uris": { + "taskOutputParameter": { + "outputParameterKey": "gcs_source", + "producerTask": "parse-data-source" + } + } + } + }, + "taskInfo": { + "name": "load-table-from-uri" + } + }, + "maybe-replace-with-default": { + "cachingOptions": { + "enableCache": true + }, + "componentRef": { + "name": "comp-maybe-replace-with-default" + }, + "dependentTasks": [ + "generate-iso8601-underscore-datetime-format" + ], + "inputs": { + "parameters": { + "default": { + "runtimeValue": { + "constantValue": { + "stringValue": "prediction_{{$.inputs.parameters['pipelineparam--generate-iso8601-underscore-datetime-format-Output']}}" + } + } + }, + "pipelineparam--generate-iso8601-underscore-datetime-format-Output": { + "taskOutputParameter": { + "outputParameterKey": "Output", + "producerTask": "generate-iso8601-underscore-datetime-format" + } + }, + "value": { + "componentInputParameter": "pipelineparam--bigquery_destination_uri" + } + } + }, + "taskInfo": { + "name": "maybe-replace-with-default" + } + }, + "parse-data-source": { + "cachingOptions": { + "enableCache": true + }, + "componentRef": { + "name": "comp-parse-data-source" + }, + "inputs": { + "parameters": { + "data_source": { + "componentInputParameter": "pipelineparam--data_source" + } + } + }, + "taskInfo": { + "name": "parse-data-source" + } + }, + "validate-inputs": { + "cachingOptions": { + "enableCache": true + }, + "componentRef": { + "name": "comp-validate-inputs" + }, + "dependentTasks": [ + "parse-data-source" + ], + "inputs": { + "parameters": { + "bigquery_destination_uri": { + "componentInputParameter": "pipelineparam--bigquery_destination_uri" + }, + "bq_source": { + "taskOutputParameter": { + "outputParameterKey": "bq_source", + "producerTask": "parse-data-source" + } + }, + "gcs_source": { + "taskOutputParameter": { + "outputParameterKey": "gcs_source", + "producerTask": "parse-data-source" + } + }, + "source_model_uri": { + "componentInputParameter": "pipelineparam--model_name" + } + } + }, + "taskInfo": { + "name": "validate-inputs" + } + } + } + }, + "inputDefinitions": { + "parameters": { + "pipelineparam--bigquery_destination_uri": { + "type": "STRING" + }, + "pipelineparam--data_source": { + "type": "STRING" + }, + "pipelineparam--location": { + "type": "STRING" + }, + "pipelineparam--model_name": { + "type": "STRING" + }, + "pipelineparam--project": { + "type": "STRING" + } + } + } + }, + "comp-generate-iso8601-underscore-datetime-format": { + "executorLabel": "exec-generate-iso8601-underscore-datetime-format", + "inputDefinitions": { + "parameters": { + "run_id": { + "type": "STRING" + } + } + }, + "outputDefinitions": { + "parameters": { + "Output": { + "type": "STRING" + } + } + } + }, + "comp-get-first-valid": { + "executorLabel": "exec-get-first-valid", + "inputDefinitions": { + "parameters": { + "values": { + "type": "STRING" + } + } + }, + "outputDefinitions": { + "parameters": { + "Output": { + "type": "STRING" + } + } + } + }, + "comp-get-model-metadata": { + "executorLabel": "exec-get-model-metadata", + "inputDefinitions": { + "parameters": { + "location": { + "type": "STRING" + }, + "model": { + "type": "STRING" + }, + "project": { + "type": "STRING" + } + } + }, + "outputDefinitions": { + "parameters": { + "forecast_horizon": { + "type": "INT" + }, + "target_column_name": { + "type": "STRING" + }, + "time_column": { + "type": "STRING" + }, + "time_series_identifier_column": { + "type": "STRING" + } + } + } + }, + "comp-load-table-from-uri": { + "executorLabel": "exec-load-table-from-uri", + "inputDefinitions": { + "parameters": { + "destination": { + "type": "STRING" + }, + "location": { + "type": "STRING" + }, + "project": { + "type": "STRING" + }, + "source_format": { + "type": "STRING" + }, + "source_uris": { + "type": "STRING" + } + } + }, + "outputDefinitions": { + "parameters": { + "Output": { + "type": "STRING" + } + } + } + }, + "comp-maybe-replace-with-default": { + "executorLabel": "exec-maybe-replace-with-default", + "inputDefinitions": { + "parameters": { + "default": { + "type": "STRING" + }, + "value": { + "type": "STRING" + } + } + }, + "outputDefinitions": { + "parameters": { + "Output": { + "type": "STRING" + } + } + } + }, + "comp-parse-data-source": { + "executorLabel": "exec-parse-data-source", + "inputDefinitions": { + "parameters": { + "data_source": { + "type": "STRING" + } + } + }, + "outputDefinitions": { + "parameters": { + "bq_source": { + "type": "STRING" + }, + "gcs_source": { + "type": "STRING" + } + } + } + }, + "comp-validate-inputs": { + "executorLabel": "exec-validate-inputs", + "inputDefinitions": { + "parameters": { + "bigquery_destination_uri": { + "type": "STRING" + }, + "bq_source": { + "type": "STRING" + }, + "gcs_source": { + "type": "STRING" + }, + "source_model_uri": { + "type": "STRING" + } + } + } + } + }, + "deploymentSpec": { + "executors": { + "exec-bigquery-create-dataset": { + "container": { + "args": [ + "--executor_input", + "{{$}}", + "--function_to_execute", + "bigquery_create_dataset" + ], + "command": [ + "sh", + "-c", + "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip || python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1 python3 -m pip install --quiet --no-warn-script-location 'google-cloud-bigquery==2.20.0' 'kfp==1.8.11' && \"$0\" \"$@\"\n", + "sh", + "-ec", + "program_path=$(mktemp -d)\nprintf \"%s\" \"$0\" > \"$program_path/ephemeral_component.py\"\npython3 -m kfp.v2.components.executor_main --component_module_path \"$program_path/ephemeral_component.py\" \"$@\"\n", + "\nimport kfp\nfrom kfp.v2 import dsl\nfrom kfp.v2.dsl import *\nfrom typing import *\n\ndef bigquery_create_dataset(\n project: str,\n location: str,\n dataset: str,\n exists_ok: bool = False,\n) -> NamedTuple('Outputs', [('project_id', str), ('dataset_id', str)]):\n \"\"\"Creates a BigQuery dataset.\"\"\"\n # pylint: disable=g-import-not-at-top,import-outside-toplevel\n import collections\n\n from google.cloud import bigquery\n # pylint: enable=g-import-not-at-top,import-outside-toplevel\n\n client = bigquery.Client(project=project, location=location)\n ref = client.create_dataset(dataset=dataset, exists_ok=exists_ok)\n return collections.namedtuple('Outputs', ['project_id', 'dataset_id'])(\n ref.project, ref.dataset_id)\n\n" + ], + "image": "python:3.7-slim" + } + }, + "exec-bigquery-create-dataset-2": { + "container": { + "args": [ + "--executor_input", + "{{$}}", + "--function_to_execute", + "bigquery_create_dataset" + ], + "command": [ + "sh", + "-c", + "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip || python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1 python3 -m pip install --quiet --no-warn-script-location 'google-cloud-bigquery==2.20.0' 'kfp==1.8.11' && \"$0\" \"$@\"\n", + "sh", + "-ec", + "program_path=$(mktemp -d)\nprintf \"%s\" \"$0\" > \"$program_path/ephemeral_component.py\"\npython3 -m kfp.v2.components.executor_main --component_module_path \"$program_path/ephemeral_component.py\" \"$@\"\n", + "\nimport kfp\nfrom kfp.v2 import dsl\nfrom kfp.v2.dsl import *\nfrom typing import *\n\ndef bigquery_create_dataset(\n project: str,\n location: str,\n dataset: str,\n exists_ok: bool = False,\n) -> NamedTuple('Outputs', [('project_id', str), ('dataset_id', str)]):\n \"\"\"Creates a BigQuery dataset.\"\"\"\n # pylint: disable=g-import-not-at-top,import-outside-toplevel\n import collections\n\n from google.cloud import bigquery\n # pylint: enable=g-import-not-at-top,import-outside-toplevel\n\n client = bigquery.Client(project=project, location=location)\n ref = client.create_dataset(dataset=dataset, exists_ok=exists_ok)\n return collections.namedtuple('Outputs', ['project_id', 'dataset_id'])(\n ref.project, ref.dataset_id)\n\n" + ], + "image": "python:3.7-slim" + } + }, + "exec-bigquery-delete-dataset-with-prefix": { + "container": { + "args": [ + "--executor_input", + "{{$}}", + "--function_to_execute", + "bigquery_delete_dataset_with_prefix" + ], + "command": [ + "sh", + "-c", + "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip || python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1 python3 -m pip install --quiet --no-warn-script-location 'google-cloud-bigquery==2.20.0' 'kfp==1.8.11' && \"$0\" \"$@\"\n", + "sh", + "-ec", + "program_path=$(mktemp -d)\nprintf \"%s\" \"$0\" > \"$program_path/ephemeral_component.py\"\npython3 -m kfp.v2.components.executor_main --component_module_path \"$program_path/ephemeral_component.py\" \"$@\"\n", + "\nimport kfp\nfrom kfp.v2 import dsl\nfrom kfp.v2.dsl import *\nfrom typing import *\n\ndef bigquery_delete_dataset_with_prefix(\n project: str,\n location: str,\n dataset_prefix: str,\n delete_contents: bool = False,\n) -> None:\n \"\"\"Deletes all BigQuery datasets matching the given prefix.\"\"\"\n # pylint: disable=g-import-not-at-top,import-outside-toplevel\n from google.cloud import bigquery\n # pylint: enable=g-import-not-at-top,import-outside-toplevel\n\n client = bigquery.Client(project=project, location=location)\n for dataset in client.list_datasets(project=project):\n if dataset.dataset_id.startswith(dataset_prefix):\n client.delete_dataset(\n dataset=dataset.dataset_id,\n delete_contents=delete_contents)\n\n" + ], + "image": "python:3.7-slim" + } + }, + "exec-bigquery-query-job": { + "container": { + "args": [ + "--type", + "BigqueryQueryJob", + "--project", + "{{$.inputs.parameters['project']}}", + "--location", + "{{$.inputs.parameters['location']}}", + "--payload", + "{\"configuration\": {\"query\": {{$.inputs.parameters['job_configuration_query']}}, \"labels\": {{$.inputs.parameters['labels']}}}}", + "--job_configuration_query_override", + "{\"query\": \"{{$.inputs.parameters['query']}}\", \"query_parameters\": {{$.inputs.parameters['query_parameters']}}, \"destination_encryption_configuration\": {\"kmsKeyName\": \"\"}}", + "--gcp_resources", + "{{$.outputs.parameters['gcp_resources'].output_file}}", + "--executor_input", + "{{$}}" + ], + "command": [ + "python3", + "-u", + "-m", + "google_cloud_pipeline_components.container.v1.gcp_launcher.launcher" + ], + "image": "gcr.io/ml-pipeline/google-cloud-pipeline-components:0.3.0" + } + }, + "exec-generate-iso8601-underscore-datetime-format": { + "container": { + "args": [ + "--executor_input", + "{{$}}", + "--function_to_execute", + "generate_iso8601_underscore_datetime_format" + ], + "command": [ + "sh", + "-c", + "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip || python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1 python3 -m pip install --quiet --no-warn-script-location 'kfp==1.8.11' && \"$0\" \"$@\"\n", + "sh", + "-ec", + "program_path=$(mktemp -d)\nprintf \"%s\" \"$0\" > \"$program_path/ephemeral_component.py\"\npython3 -m kfp.v2.components.executor_main --component_module_path \"$program_path/ephemeral_component.py\" \"$@\"\n", + "\nimport kfp\nfrom kfp.v2 import dsl\nfrom kfp.v2.dsl import *\nfrom typing import *\n\ndef generate_iso8601_underscore_datetime_format(run_id: str) -> str:\n \"\"\"Creates a timestamp using the same logic as Vertex Forecasting.\"\"\"\n # pylint: disable=g-import-not-at-top,import-outside-toplevel\n import datetime\n # pylint: enable=g-import-not-at-top,import-outside-toplevel\n\n timestamp = datetime.datetime.now().strftime('%Y_%m_%dT%H_%M_%S_%f')[:23]\n return f'{run_id}_{timestamp}Z'\n\n" + ], + "image": "python:3.7-slim" + } + }, + "exec-get-first-valid": { + "container": { + "args": [ + "--executor_input", + "{{$}}", + "--function_to_execute", + "get_first_valid" + ], + "command": [ + "sh", + "-c", + "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip || python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1 python3 -m pip install --quiet --no-warn-script-location 'kfp==1.8.11' && \"$0\" \"$@\"\n", + "sh", + "-ec", + "program_path=$(mktemp -d)\nprintf \"%s\" \"$0\" > \"$program_path/ephemeral_component.py\"\npython3 -m kfp.v2.components.executor_main --component_module_path \"$program_path/ephemeral_component.py\" \"$@\"\n", + "\nimport kfp\nfrom kfp.v2 import dsl\nfrom kfp.v2.dsl import *\nfrom typing import *\n\ndef get_first_valid(values: str) -> str:\n \"\"\"Returns the first truthy value from the given serialized JSON list.\"\"\"\n # pylint: disable=g-import-not-at-top,import-outside-toplevel\n import json\n # pylint: enable=g-import-not-at-top,import-outside-toplevel\n\n for value in json.loads(values):\n if value:\n return value\n raise ValueError('No valid values.')\n\n" + ], + "image": "python:3.7-slim" + } + }, + "exec-get-model-metadata": { + "container": { + "args": [ + "--executor_input", + "{{$}}", + "--function_to_execute", + "get_model_metadata" + ], + "command": [ + "sh", + "-c", + "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip || python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1 python3 -m pip install --quiet --no-warn-script-location 'google-cloud-bigquery==2.20.0' 'kfp==1.8.11' && \"$0\" \"$@\"\n", + "sh", + "-ec", + "program_path=$(mktemp -d)\nprintf \"%s\" \"$0\" > \"$program_path/ephemeral_component.py\"\npython3 -m kfp.v2.components.executor_main --component_module_path \"$program_path/ephemeral_component.py\" \"$@\"\n", + "\nimport kfp\nfrom kfp.v2 import dsl\nfrom kfp.v2.dsl import *\nfrom typing import *\n\ndef get_model_metadata(\n project: str,\n location: str,\n model: str,\n) -> NamedTuple(\n 'Outputs',\n [\n ('time_column', str),\n ('time_series_identifier_column', str),\n ('target_column_name', str),\n ('forecast_horizon', int),\n ],\n):\n \"\"\"Retrieves training options for a BQML model.\"\"\"\n # pylint: disable=g-import-not-at-top,import-outside-toplevel\n import collections\n\n from google.cloud import bigquery\n # pylint: enable=g-import-not-at-top,import-outside-toplevel\n\n client = bigquery.Client(project=project, location=location)\n options = client.get_model(model).training_runs[0].training_options\n return collections.namedtuple(\n 'Outputs', [\n 'time_column',\n 'time_series_identifier_column',\n 'target_column_name',\n 'forecast_horizon',\n ],\n )(\n options.time_series_timestamp_column,\n options.time_series_id_column,\n options.time_series_data_column,\n options.horizon,\n )\n\n" + ], + "image": "python:3.7-slim" + } + }, + "exec-load-table-from-uri": { + "container": { + "args": [ + "--executor_input", + "{{$}}", + "--function_to_execute", + "load_table_from_uri" + ], + "command": [ + "sh", + "-c", + "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip || python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1 python3 -m pip install --quiet --no-warn-script-location 'google-cloud-bigquery==2.20.0' 'kfp==1.8.11' && \"$0\" \"$@\"\n", + "sh", + "-ec", + "program_path=$(mktemp -d)\nprintf \"%s\" \"$0\" > \"$program_path/ephemeral_component.py\"\npython3 -m kfp.v2.components.executor_main --component_module_path \"$program_path/ephemeral_component.py\" \"$@\"\n", + "\nimport kfp\nfrom kfp.v2 import dsl\nfrom kfp.v2.dsl import *\nfrom typing import *\n\ndef load_table_from_uri(\n project: str,\n location: str,\n source_uris: List[str],\n destination: str,\n source_format: str = 'CSV',\n) -> str:\n \"\"\"Creates a table from a list of URIs.\n\n Args:\n project: The GCP project.\n location: The GCP region.\n source_uris: URIs of data files to be loaded; in format\n gs://<bucket_name>/<object_name_or_glob>.\n destination: Table into which data is to be loaded.\n source_format: The file format for the files being imported. Only CSV is\n supported.\n\n Returns:\n The destination table containing imported data.\n \"\"\"\n # pylint: disable=g-import-not-at-top,import-outside-toplevel\n from google.cloud import bigquery\n # pylint: enable=g-import-not-at-top,import-outside-toplevel\n\n if not source_uris:\n return ''\n\n client = bigquery.Client(project=project, location=location)\n job_config = bigquery.LoadJobConfig(\n autodetect=True, source_format=source_format)\n client.load_table_from_uri(\n source_uris=source_uris,\n destination=destination,\n project=project,\n location=location,\n job_config=job_config).result()\n return destination\n\n" + ], + "image": "python:3.7-slim" + } + }, + "exec-maybe-replace-with-default": { + "container": { + "args": [ + "--executor_input", + "{{$}}", + "--function_to_execute", + "maybe_replace_with_default" + ], + "command": [ + "sh", + "-c", + "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip || python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1 python3 -m pip install --quiet --no-warn-script-location 'kfp==1.8.11' && \"$0\" \"$@\"\n", + "sh", + "-ec", + "program_path=$(mktemp -d)\nprintf \"%s\" \"$0\" > \"$program_path/ephemeral_component.py\"\npython3 -m kfp.v2.components.executor_main --component_module_path \"$program_path/ephemeral_component.py\" \"$@\"\n", + "\nimport kfp\nfrom kfp.v2 import dsl\nfrom kfp.v2.dsl import *\nfrom typing import *\n\ndef maybe_replace_with_default(value: str, default: str = '') -> str:\n \"\"\"Replaces string with another value if it is a dash.\"\"\"\n return default if not value or value == '-' else value\n\n" + ], + "image": "python:3.7-slim" + } + }, + "exec-parse-data-source": { + "container": { + "args": [ + "--executor_input", + "{{$}}", + "--function_to_execute", + "parse_data_source" + ], + "command": [ + "sh", + "-c", + "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip || python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1 python3 -m pip install --quiet --no-warn-script-location 'kfp==1.8.11' && \"$0\" \"$@\"\n", + "sh", + "-ec", + "program_path=$(mktemp -d)\nprintf \"%s\" \"$0\" > \"$program_path/ephemeral_component.py\"\npython3 -m kfp.v2.components.executor_main --component_module_path \"$program_path/ephemeral_component.py\" \"$@\"\n", + "\nimport kfp\nfrom kfp.v2 import dsl\nfrom kfp.v2.dsl import *\nfrom typing import *\n\ndef parse_data_source(\n data_source: Dict[str, Dict[str, Union[str, List[str]]]],\n) -> NamedTuple('Outputs', [\n ('bq_source', str),\n ('gcs_source', List[str]),\n]):\n \"\"\"Converts the data source JSON into flat arguments.\"\"\"\n # pylint: disable=g-import-not-at-top,import-outside-toplevel\n import collections\n import re\n # pylint: enable=g-import-not-at-top,import-outside-toplevel\n\n result = {}\n if 'big_query_data_source' in data_source:\n result['bq_source'] = re.sub(\n '^bq://',\n '',\n data_source['big_query_data_source']['big_query_table_path'])\n if 'csv_data_source' in data_source:\n result['gcs_source'] = data_source['csv_data_source']['csv_filenames']\n return collections.namedtuple('Outputs', ['bq_source', 'gcs_source'])(\n result.get('bq_source', ''), result.get('gcs_source', []))\n\n" + ], + "image": "python:3.7-slim" + } + }, + "exec-validate-inputs": { + "container": { + "args": [ + "--executor_input", + "{{$}}", + "--function_to_execute", + "validate_inputs" + ], + "command": [ + "sh", + "-c", + "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip || python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1 python3 -m pip install --quiet --no-warn-script-location 'kfp==1.8.11' && \"$0\" \"$@\"\n", + "sh", + "-ec", + "program_path=$(mktemp -d)\nprintf \"%s\" \"$0\" > \"$program_path/ephemeral_component.py\"\npython3 -m kfp.v2.components.executor_main --component_module_path \"$program_path/ephemeral_component.py\" \"$@\"\n", + "\nimport kfp\nfrom kfp.v2 import dsl\nfrom kfp.v2.dsl import *\nfrom typing import *\n\ndef validate_inputs(\n time_column: Optional[str] = None,\n time_series_identifier_column: Optional[str] = None,\n target_column_name: Optional[str] = None,\n bq_source: Optional[str] = None,\n training_fraction_split: Optional[float] = None,\n validation_fraction_split: Optional[float] = None,\n test_fraction_split: Optional[float] = None,\n predefined_split_column: Optional[str] = None,\n gcs_source: Optional[List[str]] = None,\n source_model_uri: Optional[str] = None,\n bigquery_destination_uri: Optional[str] = None,\n) -> None:\n \"\"\"Checks training pipeline input parameters are valid.\"\"\"\n # pylint: disable=g-import-not-at-top,import-outside-toplevel\n import re\n # pylint: enable=g-import-not-at-top,import-outside-toplevel\n\n project_pattern = r'([a-z0-9.-]+:)?[a-z][a-z0-9-_]{4,28}[a-z0-9]'\n dataset_pattern = r'.[a-zA-Z0-9_]+'\n table_pattern = r'.[^\\.\\:`]+'\n\n # Validate BigQuery column and dataset names.\n bigquery_column_parameters = [\n time_column,\n time_series_identifier_column,\n target_column_name,\n ]\n column_pattern = re.compile(r'[a-zA-Z_][a-zA-Z0-9_]{1,300}')\n for column in bigquery_column_parameters:\n if column is not None and not column_pattern.fullmatch(column):\n raise ValueError(f'Invalid column name: {column}.')\n dataset_uri_pattern = re.compile(project_pattern + dataset_pattern)\n if (\n bigquery_destination_uri != '-'\n and bigquery_destination_uri is not None\n and not dataset_uri_pattern.fullmatch(bigquery_destination_uri)\n ):\n raise ValueError(\n f'Invalid BigQuery dataset URI: {bigquery_destination_uri}.')\n table_uri_pattern = re.compile(\n project_pattern + dataset_pattern + table_pattern)\n if (\n source_model_uri is not None\n and not table_uri_pattern.fullmatch(source_model_uri)\n ):\n raise ValueError(f'Invalid BigQuery table URI: {source_model_uri}.')\n\n # Validate data source.\n data_source_count = sum([bool(source) for source in [bq_source, gcs_source]])\n if data_source_count > 1:\n raise ValueError(f'Expected 1 data source, found {data_source_count}.')\n if bq_source and not table_uri_pattern.fullmatch(bq_source):\n raise ValueError(f'Invalid BigQuery table URI: {bq_source}.')\n gcs_path_pattern = re.compile(r'gs:\\/\\/(.+)\\/([^\\/]+)')\n if gcs_source:\n for gcs_path in gcs_source:\n if not gcs_path_pattern.fullmatch(gcs_path):\n raise ValueError(f'Invalid path to CSV stored in GCS: {gcs_path}.')\n\n # Validate split spec.\n fraction_splits = [\n training_fraction_split,\n validation_fraction_split,\n test_fraction_split,\n ]\n split_count = sum(\n [bool(source)\n for source in [predefined_split_column, any(fraction_splits)]])\n if split_count > 1:\n raise ValueError(f'Expected 1 split type, found {split_count}.')\n if (\n predefined_split_column\n and not column_pattern.fullmatch(predefined_split_column)\n ):\n raise ValueError(f'Invalid column name: {predefined_split_column}.')\n if any(fraction_splits):\n if not all(fraction_splits):\n raise ValueError(\n f'All fractions must be non-zero. Got: {fraction_splits}.')\n if sum(fraction_splits) != 1:\n raise ValueError(\n f'Fraction splits must sum to 1. Got: {sum(fraction_splits)}.')\n\n" + ], + "image": "python:3.7-slim" + } + } + } + }, + "pipelineInfo": { + "name": "automl-tabular-bqml-arima-prediction" + }, + "root": { + "dag": { + "tasks": { + "bigquery-delete-dataset-with-prefix": { + "componentRef": { + "name": "comp-bigquery-delete-dataset-with-prefix" + }, + "dependentTasks": [ + "exit-handler-1" + ], + "inputs": { + "parameters": { + "dataset_prefix": { + "runtimeValue": { + "constantValue": { + "stringValue": "tmp_{{$.pipeline_job_uuid}}" + } + } + }, + "delete_contents": { + "runtimeValue": { + "constantValue": { + "intValue": "1" + } + } + }, + "location": { + "componentInputParameter": "location" + }, + "project": { + "componentInputParameter": "project" + } + } + }, + "taskInfo": { + "name": "delete-tmp-dataset" + }, + "triggerPolicy": { + "strategy": "ALL_UPSTREAM_TASKS_COMPLETED" + } + }, + "exit-handler-1": { + "componentRef": { + "name": "comp-exit-handler-1" + }, + "inputs": { + "parameters": { + "pipelineparam--bigquery_destination_uri": { + "componentInputParameter": "bigquery_destination_uri" + }, + "pipelineparam--data_source": { + "componentInputParameter": "data_source" + }, + "pipelineparam--location": { + "componentInputParameter": "location" + }, + "pipelineparam--model_name": { + "componentInputParameter": "model_name" + }, + "pipelineparam--project": { + "componentInputParameter": "project" + } + } + }, + "taskInfo": { + "name": "exit-handler-1" + } + } + } + }, + "inputDefinitions": { + "parameters": { + "bigquery_destination_uri": { + "type": "STRING" + }, + "data_source": { + "type": "STRING" + }, + "generate_explanation": { + "type": "STRING" + }, + "location": { + "type": "STRING" + }, + "model_name": { + "type": "STRING" + }, + "project": { + "type": "STRING" + } + } + } + }, + "schemaVersion": "2.0.0", + "sdkVersion": "kfp-1.8.11" + }, + "runtimeConfig": { + "parameters": { + "bigquery_destination_uri": { + "stringValue": "-" + }, + "data_source": { + "stringValue": "{\"big_query_data_source\": {\"big_query_table_path\": \"bq://[PROJECT].[DATASET].[TABLE]\"}}" + }, + "generate_explanation": { + "stringValue": "False" + } + } + } +} \ No newline at end of file diff --git a/components/google-cloud/google_cloud_pipeline_components/experimental/automl/forecasting/bqml_arima_train_pipeline.json b/components/google-cloud/google_cloud_pipeline_components/experimental/automl/forecasting/bqml_arima_train_pipeline.json new file mode 100644 index 000000000000..676b2d03d851 --- /dev/null +++ b/components/google-cloud/google_cloud_pipeline_components/experimental/automl/forecasting/bqml_arima_train_pipeline.json @@ -0,0 +1,3421 @@ +{ + "pipelineSpec": { + "components": { + "comp-bigquery-create-dataset": { + "executorLabel": "exec-bigquery-create-dataset", + "inputDefinitions": { + "parameters": { + "dataset": { + "type": "STRING" + }, + "exists_ok": { + "type": "STRING" + }, + "location": { + "type": "STRING" + }, + "project": { + "type": "STRING" + } + } + }, + "outputDefinitions": { + "parameters": { + "dataset_id": { + "type": "STRING" + }, + "project_id": { + "type": "STRING" + } + } + } + }, + "comp-bigquery-create-dataset-2": { + "executorLabel": "exec-bigquery-create-dataset-2", + "inputDefinitions": { + "parameters": { + "dataset": { + "type": "STRING" + }, + "exists_ok": { + "type": "STRING" + }, + "location": { + "type": "STRING" + }, + "project": { + "type": "STRING" + } + } + }, + "outputDefinitions": { + "parameters": { + "dataset_id": { + "type": "STRING" + }, + "project_id": { + "type": "STRING" + } + } + } + }, + "comp-bigquery-create-model-job": { + "executorLabel": "exec-bigquery-create-model-job", + "inputDefinitions": { + "parameters": { + "job_configuration_query": { + "type": "STRING" + }, + "labels": { + "type": "STRING" + }, + "location": { + "type": "STRING" + }, + "project": { + "type": "STRING" + }, + "query": { + "type": "STRING" + }, + "query_parameters": { + "type": "STRING" + } + } + }, + "outputDefinitions": { + "artifacts": { + "model": { + "artifactType": { + "schemaTitle": "google.BQMLModel", + "schemaVersion": "0.0.1" + } + } + }, + "parameters": { + "gcp_resources": { + "type": "STRING" + } + } + } + }, + "comp-bigquery-create-model-job-2": { + "executorLabel": "exec-bigquery-create-model-job-2", + "inputDefinitions": { + "parameters": { + "job_configuration_query": { + "type": "STRING" + }, + "labels": { + "type": "STRING" + }, + "location": { + "type": "STRING" + }, + "project": { + "type": "STRING" + }, + "query": { + "type": "STRING" + }, + "query_parameters": { + "type": "STRING" + } + } + }, + "outputDefinitions": { + "artifacts": { + "model": { + "artifactType": { + "schemaTitle": "google.BQMLModel", + "schemaVersion": "0.0.1" + } + } + }, + "parameters": { + "gcp_resources": { + "type": "STRING" + } + } + } + }, + "comp-bigquery-delete-dataset-with-prefix": { + "executorLabel": "exec-bigquery-delete-dataset-with-prefix", + "inputDefinitions": { + "parameters": { + "dataset_prefix": { + "type": "STRING" + }, + "delete_contents": { + "type": "STRING" + }, + "location": { + "type": "STRING" + }, + "project": { + "type": "STRING" + } + } + } + }, + "comp-bigquery-list-rows": { + "executorLabel": "exec-bigquery-list-rows", + "inputDefinitions": { + "artifacts": { + "table": { + "artifactType": { + "schemaTitle": "system.Artifact", + "schemaVersion": "0.0.1" + } + } + }, + "parameters": { + "location": { + "type": "STRING" + }, + "project": { + "type": "STRING" + } + } + }, + "outputDefinitions": { + "parameters": { + "Output": { + "type": "STRING" + } + } + } + }, + "comp-bigquery-query-job": { + "executorLabel": "exec-bigquery-query-job", + "inputDefinitions": { + "parameters": { + "job_configuration_query": { + "type": "STRING" + }, + "labels": { + "type": "STRING" + }, + "location": { + "type": "STRING" + }, + "project": { + "type": "STRING" + }, + "query": { + "type": "STRING" + }, + "query_parameters": { + "type": "STRING" + } + } + }, + "outputDefinitions": { + "artifacts": { + "destination_table": { + "artifactType": { + "schemaTitle": "google.BQTable", + "schemaVersion": "0.0.1" + } + } + }, + "parameters": { + "gcp_resources": { + "type": "STRING" + } + } + } + }, + "comp-bigquery-query-job-2": { + "executorLabel": "exec-bigquery-query-job-2", + "inputDefinitions": { + "parameters": { + "job_configuration_query": { + "type": "STRING" + }, + "labels": { + "type": "STRING" + }, + "location": { + "type": "STRING" + }, + "project": { + "type": "STRING" + }, + "query": { + "type": "STRING" + }, + "query_parameters": { + "type": "STRING" + } + } + }, + "outputDefinitions": { + "artifacts": { + "destination_table": { + "artifactType": { + "schemaTitle": "google.BQTable", + "schemaVersion": "0.0.1" + } + } + }, + "parameters": { + "gcp_resources": { + "type": "STRING" + } + } + } + }, + "comp-bigquery-query-job-3": { + "executorLabel": "exec-bigquery-query-job-3", + "inputDefinitions": { + "parameters": { + "job_configuration_query": { + "type": "STRING" + }, + "labels": { + "type": "STRING" + }, + "location": { + "type": "STRING" + }, + "project": { + "type": "STRING" + }, + "query": { + "type": "STRING" + }, + "query_parameters": { + "type": "STRING" + } + } + }, + "outputDefinitions": { + "artifacts": { + "destination_table": { + "artifactType": { + "schemaTitle": "google.BQTable", + "schemaVersion": "0.0.1" + } + } + }, + "parameters": { + "gcp_resources": { + "type": "STRING" + } + } + } + }, + "comp-bigquery-query-job-4": { + "executorLabel": "exec-bigquery-query-job-4", + "inputDefinitions": { + "parameters": { + "job_configuration_query": { + "type": "STRING" + }, + "labels": { + "type": "STRING" + }, + "location": { + "type": "STRING" + }, + "project": { + "type": "STRING" + }, + "query": { + "type": "STRING" + }, + "query_parameters": { + "type": "STRING" + } + } + }, + "outputDefinitions": { + "artifacts": { + "destination_table": { + "artifactType": { + "schemaTitle": "google.BQTable", + "schemaVersion": "0.0.1" + } + } + }, + "parameters": { + "gcp_resources": { + "type": "STRING" + } + } + } + }, + "comp-bigquery-query-job-5": { + "executorLabel": "exec-bigquery-query-job-5", + "inputDefinitions": { + "parameters": { + "job_configuration_query": { + "type": "STRING" + }, + "labels": { + "type": "STRING" + }, + "location": { + "type": "STRING" + }, + "project": { + "type": "STRING" + }, + "query": { + "type": "STRING" + }, + "query_parameters": { + "type": "STRING" + } + } + }, + "outputDefinitions": { + "artifacts": { + "destination_table": { + "artifactType": { + "schemaTitle": "google.BQTable", + "schemaVersion": "0.0.1" + } + } + }, + "parameters": { + "gcp_resources": { + "type": "STRING" + } + } + } + }, + "comp-bigquery-query-job-6": { + "executorLabel": "exec-bigquery-query-job-6", + "inputDefinitions": { + "parameters": { + "job_configuration_query": { + "type": "STRING" + }, + "labels": { + "type": "STRING" + }, + "location": { + "type": "STRING" + }, + "project": { + "type": "STRING" + }, + "query": { + "type": "STRING" + }, + "query_parameters": { + "type": "STRING" + } + } + }, + "outputDefinitions": { + "artifacts": { + "destination_table": { + "artifactType": { + "schemaTitle": "google.BQTable", + "schemaVersion": "0.0.1" + } + } + }, + "parameters": { + "gcp_resources": { + "type": "STRING" + } + } + } + }, + "comp-bigquery-query-job-7": { + "executorLabel": "exec-bigquery-query-job-7", + "inputDefinitions": { + "parameters": { + "job_configuration_query": { + "type": "STRING" + }, + "labels": { + "type": "STRING" + }, + "location": { + "type": "STRING" + }, + "project": { + "type": "STRING" + }, + "query": { + "type": "STRING" + }, + "query_parameters": { + "type": "STRING" + } + } + }, + "outputDefinitions": { + "artifacts": { + "destination_table": { + "artifactType": { + "schemaTitle": "google.BQTable", + "schemaVersion": "0.0.1" + } + } + }, + "parameters": { + "gcp_resources": { + "type": "STRING" + } + } + } + }, + "comp-build-serialized-query-parameters": { + "executorLabel": "exec-build-serialized-query-parameters", + "inputDefinitions": { + "parameters": { + "forecast_horizon": { + "type": "INT" + }, + "forecast_horizon_off_by_one": { + "type": "STRING" + }, + "splits": { + "type": "STRING" + } + } + }, + "outputDefinitions": { + "parameters": { + "Output": { + "type": "STRING" + } + } + } + }, + "comp-build-serialized-query-parameters-2": { + "executorLabel": "exec-build-serialized-query-parameters-2", + "inputDefinitions": { + "parameters": { + "data_granularity_unit": { + "type": "STRING" + }, + "forecast_horizon": { + "type": "INT" + }, + "forecast_horizon_off_by_one": { + "type": "STRING" + }, + "max_order": { + "type": "INT" + }, + "splits": { + "type": "STRING" + }, + "window": { + "type": "STRING" + } + } + }, + "outputDefinitions": { + "parameters": { + "Output": { + "type": "STRING" + } + } + } + }, + "comp-build-serialized-query-parameters-3": { + "executorLabel": "exec-build-serialized-query-parameters-3", + "inputDefinitions": { + "parameters": { + "data_granularity_unit": { + "type": "STRING" + }, + "forecast_horizon": { + "type": "INT" + }, + "forecast_horizon_off_by_one": { + "type": "STRING" + }, + "max_order": { + "type": "INT" + }, + "splits": { + "type": "STRING" + } + } + }, + "outputDefinitions": { + "parameters": { + "Output": { + "type": "STRING" + } + } + } + }, + "comp-build-split-query-parameters": { + "executorLabel": "exec-build-split-query-parameters", + "inputDefinitions": { + "parameters": { + "training_fraction_split": { + "type": "DOUBLE" + }, + "validation_fraction_split": { + "type": "DOUBLE" + } + } + }, + "outputDefinitions": { + "parameters": { + "Output": { + "type": "STRING" + } + } + } + }, + "comp-copy-table": { + "executorLabel": "exec-copy-table", + "inputDefinitions": { + "parameters": { + "destination": { + "type": "STRING" + }, + "location": { + "type": "STRING" + }, + "project": { + "type": "STRING" + }, + "source": { + "type": "STRING" + } + } + } + }, + "comp-exit-handler-1": { + "dag": { + "tasks": { + "bigquery-create-dataset": { + "cachingOptions": { + "enableCache": true + }, + "componentRef": { + "name": "comp-bigquery-create-dataset" + }, + "dependentTasks": [ + "generate-iso8601-underscore-datetime-format" + ], + "inputs": { + "parameters": { + "dataset": { + "runtimeValue": { + "constantValue": { + "stringValue": "tmp_{{$.inputs.parameters['pipelineparam--generate-iso8601-underscore-datetime-format-Output']}}" + } + } + }, + "exists_ok": { + "runtimeValue": { + "constantValue": { + "stringValue": "False" + } + } + }, + "location": { + "componentInputParameter": "pipelineparam--location" + }, + "pipelineparam--generate-iso8601-underscore-datetime-format-Output": { + "taskOutputParameter": { + "outputParameterKey": "Output", + "producerTask": "generate-iso8601-underscore-datetime-format" + } + }, + "project": { + "componentInputParameter": "pipelineparam--project" + } + } + }, + "taskInfo": { + "name": "create-tmp-dataset" + } + }, + "bigquery-create-dataset-2": { + "cachingOptions": { + "enableCache": true + }, + "componentRef": { + "name": "comp-bigquery-create-dataset-2" + }, + "dependentTasks": [ + "maybe-replace-with-default" + ], + "inputs": { + "parameters": { + "dataset": { + "taskOutputParameter": { + "outputParameterKey": "Output", + "producerTask": "maybe-replace-with-default" + } + }, + "exists_ok": { + "runtimeValue": { + "constantValue": { + "intValue": "1" + } + } + }, + "location": { + "componentInputParameter": "pipelineparam--location" + }, + "project": { + "componentInputParameter": "pipelineparam--project" + } + } + }, + "taskInfo": { + "name": "create-export-dataset" + } + }, + "bigquery-create-model-job-2": { + "cachingOptions": { + "enableCache": true + }, + "componentRef": { + "name": "comp-bigquery-create-model-job-2" + }, + "dependentTasks": [ + "bigquery-create-dataset-2", + "bigquery-query-job-7", + "build-serialized-query-parameters-3", + "maybe-replace-with-default-2", + "table-to-uri" + ], + "inputs": { + "parameters": { + "job_configuration_query": { + "runtimeValue": { + "constantValue": { + "stringValue": "{}" + } + } + }, + "labels": { + "runtimeValue": { + "constantValue": { + "stringValue": "{}" + } + } + }, + "location": { + "componentInputParameter": "pipelineparam--location" + }, + "pipelineparam--bigquery-create-dataset-2-dataset_id": { + "taskOutputParameter": { + "outputParameterKey": "dataset_id", + "producerTask": "bigquery-create-dataset-2" + } + }, + "pipelineparam--bigquery-create-dataset-2-project_id": { + "taskOutputParameter": { + "outputParameterKey": "project_id", + "producerTask": "bigquery-create-dataset-2" + } + }, + "pipelineparam--maybe-replace-with-default-2-Output": { + "taskOutputParameter": { + "outputParameterKey": "Output", + "producerTask": "maybe-replace-with-default-2" + } + }, + "pipelineparam--table-to-uri-uri": { + "taskOutputParameter": { + "outputParameterKey": "uri", + "producerTask": "table-to-uri" + } + }, + "pipelineparam--target_column_name": { + "componentInputParameter": "pipelineparam--target_column_name" + }, + "pipelineparam--time_column": { + "componentInputParameter": "pipelineparam--time_column" + }, + "pipelineparam--time_series_identifier_column": { + "componentInputParameter": "pipelineparam--time_series_identifier_column" + }, + "project": { + "componentInputParameter": "pipelineparam--project" + }, + "query": { + "runtimeValue": { + "constantValue": { + "stringValue": "\n CREATE OR REPLACE MODEL `{{$.inputs.parameters['pipelineparam--bigquery-create-dataset-2-project_id']}}.{{$.inputs.parameters['pipelineparam--bigquery-create-dataset-2-dataset_id']}}.model`\n OPTIONS (\n model_type = 'ARIMA_PLUS',\n time_series_timestamp_col = '{{$.inputs.parameters['pipelineparam--time_column']}}',\n time_series_id_col = '{{$.inputs.parameters['pipelineparam--time_series_identifier_column']}}',\n time_series_data_col = '{{$.inputs.parameters['pipelineparam--target_column_name']}}',\n horizon = @forecast_horizon,\n auto_arima = True,\n auto_arima_max_order = @max_order,\n data_frequency = @data_granularity_unit,\n holiday_region = 'GLOBAL',\n clean_spikes_and_dips = True,\n adjust_step_changes = True,\n decompose_time_series = True\n ) AS\n SELECT\n {{$.inputs.parameters['pipelineparam--time_series_identifier_column']}},\n {{$.inputs.parameters['pipelineparam--time_column']}},\n {{$.inputs.parameters['pipelineparam--target_column_name']}},\n FROM `{{$.inputs.parameters['pipelineparam--table-to-uri-uri']}}`\n WHERE\n UPPER({{$.inputs.parameters['pipelineparam--maybe-replace-with-default-2-Output']}}) IN UNNEST(@splits)\n AND TIMESTAMP({{$.inputs.parameters['pipelineparam--time_column']}}) < @start_time\n " + } + } + }, + "query_parameters": { + "taskOutputParameter": { + "outputParameterKey": "Output", + "producerTask": "build-serialized-query-parameters-3" + } + } + } + }, + "taskInfo": { + "name": "create-serving-model" + } + }, + "bigquery-list-rows": { + "cachingOptions": { + "enableCache": true + }, + "componentRef": { + "name": "comp-bigquery-list-rows" + }, + "dependentTasks": [ + "bigquery-query-job-2" + ], + "inputs": { + "artifacts": { + "table": { + "taskOutputArtifact": { + "outputArtifactKey": "destination_table", + "producerTask": "bigquery-query-job-2" + } + } + }, + "parameters": { + "location": { + "componentInputParameter": "pipelineparam--location" + }, + "project": { + "componentInputParameter": "pipelineparam--project" + } + } + }, + "taskInfo": { + "name": "bigquery-list-rows" + } + }, + "bigquery-query-job": { + "cachingOptions": { + "enableCache": true + }, + "componentRef": { + "name": "comp-bigquery-query-job" + }, + "dependentTasks": [ + "bigquery-create-dataset", + "build-split-query-parameters", + "get-first-valid", + "maybe-replace-with-default-2", + "maybe-replace-with-default-3" + ], + "inputs": { + "parameters": { + "job_configuration_query": { + "runtimeValue": { + "constantValue": { + "stringValue": "{\"destinationTable\": {\"projectId\": \"{{$.inputs.parameters['pipelineparam--bigquery-create-dataset-project_id']}}\", \"datasetId\": \"{{$.inputs.parameters['pipelineparam--bigquery-create-dataset-dataset_id']}}\", \"tableId\": \"data\"}, \"writeDisposition\": \"WRITE_EMPTY\"}" + } + } + }, + "labels": { + "runtimeValue": { + "constantValue": { + "stringValue": "{}" + } + } + }, + "location": { + "componentInputParameter": "pipelineparam--location" + }, + "pipelineparam--bigquery-create-dataset-dataset_id": { + "taskOutputParameter": { + "outputParameterKey": "dataset_id", + "producerTask": "bigquery-create-dataset" + } + }, + "pipelineparam--bigquery-create-dataset-project_id": { + "taskOutputParameter": { + "outputParameterKey": "project_id", + "producerTask": "bigquery-create-dataset" + } + }, + "pipelineparam--get-first-valid-Output": { + "taskOutputParameter": { + "outputParameterKey": "Output", + "producerTask": "get-first-valid" + } + }, + "pipelineparam--maybe-replace-with-default-2-Output": { + "taskOutputParameter": { + "outputParameterKey": "Output", + "producerTask": "maybe-replace-with-default-2" + } + }, + "pipelineparam--maybe-replace-with-default-3-Output": { + "taskOutputParameter": { + "outputParameterKey": "Output", + "producerTask": "maybe-replace-with-default-3" + } + }, + "pipelineparam--target_column_name": { + "componentInputParameter": "pipelineparam--target_column_name" + }, + "pipelineparam--time_column": { + "componentInputParameter": "pipelineparam--time_column" + }, + "pipelineparam--time_series_identifier_column": { + "componentInputParameter": "pipelineparam--time_series_identifier_column" + }, + "project": { + "componentInputParameter": "pipelineparam--project" + }, + "query": { + "runtimeValue": { + "constantValue": { + "stringValue": "\n WITH\n fractional_splits AS (\n SELECT\n percentiles[OFFSET(@train_percent)] AS train_split,\n percentiles[OFFSET(@validation_percent)] AS validation_split,\n FROM (\n SELECT APPROX_QUANTILES(DISTINCT {{$.inputs.parameters['pipelineparam--time_column']}}, 100) AS percentiles,\n FROM `{{$.inputs.parameters['pipelineparam--get-first-valid-Output']}}`\n )\n ),\n data_with_fractional_splits AS (\n SELECT\n data.*,\n CASE\n WHEN {{$.inputs.parameters['pipelineparam--time_column']}} < train_split THEN 'TRAIN'\n WHEN {{$.inputs.parameters['pipelineparam--time_column']}} < validation_split THEN 'VALIDATE'\n ELSE 'TEST'\n END AS _{{$.inputs.parameters['pipelineparam--maybe-replace-with-default-2-Output']}}\n FROM `{{$.inputs.parameters['pipelineparam--get-first-valid-Output']}}` AS data\n CROSS JOIN fractional_splits\n )\n SELECT\n CAST({{$.inputs.parameters['pipelineparam--time_series_identifier_column']}} AS STRING)\n AS {{$.inputs.parameters['pipelineparam--time_series_identifier_column']}},\n TIMESTAMP({{$.inputs.parameters['pipelineparam--time_column']}}) AS {{$.inputs.parameters['pipelineparam--time_column']}},\n CAST({{$.inputs.parameters['pipelineparam--target_column_name']}} AS FLOAT64) AS {{$.inputs.parameters['pipelineparam--target_column_name']}},\n COALESCE({{$.inputs.parameters['pipelineparam--maybe-replace-with-default-3-Output']}}, _{{$.inputs.parameters['pipelineparam--maybe-replace-with-default-2-Output']}}) AS {{$.inputs.parameters['pipelineparam--maybe-replace-with-default-2-Output']}}\n FROM data_with_fractional_splits\n " + } + } + }, + "query_parameters": { + "taskOutputParameter": { + "outputParameterKey": "Output", + "producerTask": "build-split-query-parameters" + } + } + } + }, + "taskInfo": { + "name": "create-data-table" + } + }, + "bigquery-query-job-2": { + "cachingOptions": { + "enableCache": true + }, + "componentRef": { + "name": "comp-bigquery-query-job-2" + }, + "dependentTasks": [ + "bigquery-create-dataset", + "build-serialized-query-parameters", + "maybe-replace-with-default-2", + "table-to-uri" + ], + "inputs": { + "parameters": { + "job_configuration_query": { + "runtimeValue": { + "constantValue": { + "stringValue": "{\"destinationTable\": {\"projectId\": \"{{$.inputs.parameters['pipelineparam--bigquery-create-dataset-project_id']}}\", \"datasetId\": \"{{$.inputs.parameters['pipelineparam--bigquery-create-dataset-dataset_id']}}\", \"tableId\": \"windows\"}, \"writeDisposition\": \"WRITE_EMPTY\"}" + } + } + }, + "labels": { + "runtimeValue": { + "constantValue": { + "stringValue": "{}" + } + } + }, + "location": { + "componentInputParameter": "pipelineparam--location" + }, + "pipelineparam--bigquery-create-dataset-dataset_id": { + "taskOutputParameter": { + "outputParameterKey": "dataset_id", + "producerTask": "bigquery-create-dataset" + } + }, + "pipelineparam--bigquery-create-dataset-project_id": { + "taskOutputParameter": { + "outputParameterKey": "project_id", + "producerTask": "bigquery-create-dataset" + } + }, + "pipelineparam--data_granularity_unit": { + "componentInputParameter": "pipelineparam--data_granularity_unit" + }, + "pipelineparam--maybe-replace-with-default-2-Output": { + "taskOutputParameter": { + "outputParameterKey": "Output", + "producerTask": "maybe-replace-with-default-2" + } + }, + "pipelineparam--table-to-uri-uri": { + "taskOutputParameter": { + "outputParameterKey": "uri", + "producerTask": "table-to-uri" + } + }, + "pipelineparam--time_column": { + "componentInputParameter": "pipelineparam--time_column" + }, + "pipelineparam--time_series_identifier_column": { + "componentInputParameter": "pipelineparam--time_series_identifier_column" + }, + "project": { + "componentInputParameter": "pipelineparam--project" + }, + "query": { + "runtimeValue": { + "constantValue": { + "stringValue": "\n WITH\n time_series_windows AS (\n SELECT\n MIN({{$.inputs.parameters['pipelineparam--time_column']}}) OVER (horizon) AS start_time,\n COUNT(*) OVER (horizon) AS count,\n FROM `{{$.inputs.parameters['pipelineparam--table-to-uri-uri']}}`\n WHERE UPPER({{$.inputs.parameters['pipelineparam--maybe-replace-with-default-2-Output']}}) IN UNNEST(@splits)\n WINDOW horizon AS (\n PARTITION BY {{$.inputs.parameters['pipelineparam--time_series_identifier_column']}}\n ORDER BY {{$.inputs.parameters['pipelineparam--time_column']}}\n ROWS BETWEEN 0 PRECEDING AND @forecast_horizon FOLLOWING)\n )\n SELECT\n start_time,\n DATETIME_ADD(\n TIMESTAMP(start_time),\n INTERVAL @forecast_horizon {{$.inputs.parameters['pipelineparam--data_granularity_unit']}}\n ) AS end_time,\n SUM(count) AS count,\n FROM time_series_windows\n GROUP BY start_time\n " + } + } + }, + "query_parameters": { + "taskOutputParameter": { + "outputParameterKey": "Output", + "producerTask": "build-serialized-query-parameters" + } + } + } + }, + "taskInfo": { + "name": "create-eval-windows-table" + } + }, + "bigquery-query-job-3": { + "cachingOptions": { + "enableCache": true + }, + "componentRef": { + "name": "comp-bigquery-query-job-3" + }, + "dependentTasks": [ + "bigquery-create-dataset" + ], + "inputs": { + "parameters": { + "job_configuration_query": { + "runtimeValue": { + "constantValue": { + "stringValue": "{}" + } + } + }, + "labels": { + "runtimeValue": { + "constantValue": { + "stringValue": "{}" + } + } + }, + "location": { + "componentInputParameter": "pipelineparam--location" + }, + "pipelineparam--bigquery-create-dataset-dataset_id": { + "taskOutputParameter": { + "outputParameterKey": "dataset_id", + "producerTask": "bigquery-create-dataset" + } + }, + "pipelineparam--bigquery-create-dataset-project_id": { + "taskOutputParameter": { + "outputParameterKey": "project_id", + "producerTask": "bigquery-create-dataset" + } + }, + "pipelineparam--time_column": { + "componentInputParameter": "pipelineparam--time_column" + }, + "project": { + "componentInputParameter": "pipelineparam--project" + }, + "query": { + "runtimeValue": { + "constantValue": { + "stringValue": "\n CREATE TABLE `{{$.inputs.parameters['pipelineparam--bigquery-create-dataset-project_id']}}.{{$.inputs.parameters['pipelineparam--bigquery-create-dataset-dataset_id']}}.metrics` (\n predicted_on_{{$.inputs.parameters['pipelineparam--time_column']}} TIMESTAMP,\n MAE FLOAT64,\n MSE FLOAT64,\n MAPE FLOAT64,\n prediction_count INT64\n )\n " + } + } + }, + "query_parameters": { + "runtimeValue": { + "constantValue": { + "stringValue": "[]" + } + } + } + } + }, + "taskInfo": { + "name": "create-tmp-metrics-table" + } + }, + "bigquery-query-job-4": { + "cachingOptions": { + "enableCache": true + }, + "componentRef": { + "name": "comp-bigquery-query-job-4" + }, + "dependentTasks": [ + "bigquery-create-dataset" + ], + "inputs": { + "parameters": { + "job_configuration_query": { + "runtimeValue": { + "constantValue": { + "stringValue": "{}" + } + } + }, + "labels": { + "runtimeValue": { + "constantValue": { + "stringValue": "{}" + } + } + }, + "location": { + "componentInputParameter": "pipelineparam--location" + }, + "pipelineparam--bigquery-create-dataset-dataset_id": { + "taskOutputParameter": { + "outputParameterKey": "dataset_id", + "producerTask": "bigquery-create-dataset" + } + }, + "pipelineparam--bigquery-create-dataset-project_id": { + "taskOutputParameter": { + "outputParameterKey": "project_id", + "producerTask": "bigquery-create-dataset" + } + }, + "pipelineparam--target_column_name": { + "componentInputParameter": "pipelineparam--target_column_name" + }, + "pipelineparam--time_column": { + "componentInputParameter": "pipelineparam--time_column" + }, + "pipelineparam--time_series_identifier_column": { + "componentInputParameter": "pipelineparam--time_series_identifier_column" + }, + "project": { + "componentInputParameter": "pipelineparam--project" + }, + "query": { + "runtimeValue": { + "constantValue": { + "stringValue": "\n CREATE TABLE `{{$.inputs.parameters['pipelineparam--bigquery-create-dataset-project_id']}}.{{$.inputs.parameters['pipelineparam--bigquery-create-dataset-dataset_id']}}.evaluated_examples` (\n {{$.inputs.parameters['pipelineparam--time_series_identifier_column']}} STRING,\n {{$.inputs.parameters['pipelineparam--time_column']}} TIMESTAMP,\n predicted_on_{{$.inputs.parameters['pipelineparam--time_column']}} TIMESTAMP,\n {{$.inputs.parameters['pipelineparam--target_column_name']}} FLOAT64,\n predicted_{{$.inputs.parameters['pipelineparam--target_column_name']}} STRUCT<value FLOAT64>\n )\n " + } + } + }, + "query_parameters": { + "runtimeValue": { + "constantValue": { + "stringValue": "[]" + } + } + } + } + }, + "taskInfo": { + "name": "create-evaluated-examples-table" + } + }, + "bigquery-query-job-7": { + "cachingOptions": { + "enableCache": true + }, + "componentRef": { + "name": "comp-bigquery-query-job-7" + }, + "dependentTasks": [ + "bigquery-create-dataset-2", + "for-loop-2", + "table-to-uri-2" + ], + "inputs": { + "parameters": { + "job_configuration_query": { + "runtimeValue": { + "constantValue": { + "stringValue": "{\"destinationTable\": {\"projectId\": \"{{$.inputs.parameters['pipelineparam--bigquery-create-dataset-2-project_id']}}\", \"datasetId\": \"{{$.inputs.parameters['pipelineparam--bigquery-create-dataset-2-dataset_id']}}\", \"tableId\": \"metrics\"}, \"writeDisposition\": \"WRITE_EMPTY\"}" + } + } + }, + "labels": { + "runtimeValue": { + "constantValue": { + "stringValue": "{}" + } + } + }, + "location": { + "componentInputParameter": "pipelineparam--location" + }, + "pipelineparam--bigquery-create-dataset-2-dataset_id": { + "taskOutputParameter": { + "outputParameterKey": "dataset_id", + "producerTask": "bigquery-create-dataset-2" + } + }, + "pipelineparam--bigquery-create-dataset-2-project_id": { + "taskOutputParameter": { + "outputParameterKey": "project_id", + "producerTask": "bigquery-create-dataset-2" + } + }, + "pipelineparam--table-to-uri-2-uri": { + "taskOutputParameter": { + "outputParameterKey": "uri", + "producerTask": "table-to-uri-2" + } + }, + "project": { + "componentInputParameter": "pipelineparam--project" + }, + "query": { + "runtimeValue": { + "constantValue": { + "stringValue": "\n SELECT\n SUM(MAE * prediction_count) / SUM(prediction_count) AS MAE,\n SQRT(SUM(MSE * prediction_count) / SUM(prediction_count)) AS RMSE,\n SUM(MAPE * prediction_count) / SUM(prediction_count) AS MAPE,\n FROM `{{$.inputs.parameters['pipelineparam--table-to-uri-2-uri']}}`\n " + } + } + }, + "query_parameters": { + "runtimeValue": { + "constantValue": { + "stringValue": "[]" + } + } + } + } + }, + "taskInfo": { + "name": "create-backtest-table" + } + }, + "build-serialized-query-parameters": { + "cachingOptions": { + "enableCache": true + }, + "componentRef": { + "name": "comp-build-serialized-query-parameters" + }, + "inputs": { + "parameters": { + "forecast_horizon": { + "componentInputParameter": "pipelineparam--forecast_horizon" + }, + "forecast_horizon_off_by_one": { + "runtimeValue": { + "constantValue": { + "intValue": "1" + } + } + }, + "splits": { + "runtimeValue": { + "constantValue": { + "stringValue": "[\"TEST\"]" + } + } + } + } + }, + "taskInfo": { + "name": "build-serialized-query-parameters" + } + }, + "build-serialized-query-parameters-3": { + "cachingOptions": { + "enableCache": true + }, + "componentRef": { + "name": "comp-build-serialized-query-parameters-3" + }, + "inputs": { + "parameters": { + "data_granularity_unit": { + "componentInputParameter": "pipelineparam--data_granularity_unit" + }, + "forecast_horizon": { + "componentInputParameter": "pipelineparam--forecast_horizon" + }, + "forecast_horizon_off_by_one": { + "runtimeValue": { + "constantValue": { + "stringValue": "False" + } + } + }, + "max_order": { + "componentInputParameter": "pipelineparam--max_order" + }, + "splits": { + "runtimeValue": { + "constantValue": { + "stringValue": "[\"TRAIN\", \"VALIDATE\", \"TEST\"]" + } + } + } + } + }, + "taskInfo": { + "name": "build-serialized-query-parameters-3" + } + }, + "build-split-query-parameters": { + "cachingOptions": { + "enableCache": true + }, + "componentRef": { + "name": "comp-build-split-query-parameters" + }, + "dependentTasks": [ + "parse-split-spec" + ], + "inputs": { + "parameters": { + "training_fraction_split": { + "taskOutputParameter": { + "outputParameterKey": "training_fraction_split", + "producerTask": "parse-split-spec" + } + }, + "validation_fraction_split": { + "taskOutputParameter": { + "outputParameterKey": "validation_fraction_split", + "producerTask": "parse-split-spec" + } + } + } + }, + "taskInfo": { + "name": "build-split-query-parameters" + } + }, + "copy-table": { + "cachingOptions": { + "enableCache": true + }, + "componentRef": { + "name": "comp-copy-table" + }, + "dependentTasks": [ + "bigquery-create-dataset-2", + "for-loop-2", + "table-to-uri-3" + ], + "inputs": { + "parameters": { + "destination": { + "runtimeValue": { + "constantValue": { + "stringValue": "{{$.inputs.parameters['pipelineparam--bigquery-create-dataset-2-project_id']}}.{{$.inputs.parameters['pipelineparam--bigquery-create-dataset-2-dataset_id']}}.evaluated_examples" + } + } + }, + "location": { + "componentInputParameter": "pipelineparam--location" + }, + "pipelineparam--bigquery-create-dataset-2-dataset_id": { + "taskOutputParameter": { + "outputParameterKey": "dataset_id", + "producerTask": "bigquery-create-dataset-2" + } + }, + "pipelineparam--bigquery-create-dataset-2-project_id": { + "taskOutputParameter": { + "outputParameterKey": "project_id", + "producerTask": "bigquery-create-dataset-2" + } + }, + "pipelineparam--table-to-uri-3-uri": { + "taskOutputParameter": { + "outputParameterKey": "uri", + "producerTask": "table-to-uri-3" + } + }, + "project": { + "componentInputParameter": "pipelineparam--project" + }, + "source": { + "runtimeValue": { + "constantValue": { + "stringValue": "{{$.inputs.parameters['pipelineparam--table-to-uri-3-uri']}}" + } + } + } + } + }, + "taskInfo": { + "name": "export-evaluated-examples-table" + } + }, + "for-loop-2": { + "componentRef": { + "name": "comp-for-loop-2" + }, + "dependentTasks": [ + "bigquery-create-dataset", + "bigquery-list-rows", + "maybe-replace-with-default-2", + "table-to-uri", + "table-to-uri-2", + "table-to-uri-3" + ], + "inputs": { + "parameters": { + "pipelineparam--bigquery-create-dataset-dataset_id": { + "taskOutputParameter": { + "outputParameterKey": "dataset_id", + "producerTask": "bigquery-create-dataset" + } + }, + "pipelineparam--bigquery-create-dataset-project_id": { + "taskOutputParameter": { + "outputParameterKey": "project_id", + "producerTask": "bigquery-create-dataset" + } + }, + "pipelineparam--bigquery-list-rows-Output": { + "taskOutputParameter": { + "outputParameterKey": "Output", + "producerTask": "bigquery-list-rows" + } + }, + "pipelineparam--data_granularity_unit": { + "componentInputParameter": "pipelineparam--data_granularity_unit" + }, + "pipelineparam--forecast_horizon": { + "componentInputParameter": "pipelineparam--forecast_horizon" + }, + "pipelineparam--location": { + "componentInputParameter": "pipelineparam--location" + }, + "pipelineparam--max_order": { + "componentInputParameter": "pipelineparam--max_order" + }, + "pipelineparam--maybe-replace-with-default-2-Output": { + "taskOutputParameter": { + "outputParameterKey": "Output", + "producerTask": "maybe-replace-with-default-2" + } + }, + "pipelineparam--project": { + "componentInputParameter": "pipelineparam--project" + }, + "pipelineparam--table-to-uri-2-dataset_id": { + "taskOutputParameter": { + "outputParameterKey": "dataset_id", + "producerTask": "table-to-uri-2" + } + }, + "pipelineparam--table-to-uri-2-project_id": { + "taskOutputParameter": { + "outputParameterKey": "project_id", + "producerTask": "table-to-uri-2" + } + }, + "pipelineparam--table-to-uri-2-table_id": { + "taskOutputParameter": { + "outputParameterKey": "table_id", + "producerTask": "table-to-uri-2" + } + }, + "pipelineparam--table-to-uri-3-dataset_id": { + "taskOutputParameter": { + "outputParameterKey": "dataset_id", + "producerTask": "table-to-uri-3" + } + }, + "pipelineparam--table-to-uri-3-project_id": { + "taskOutputParameter": { + "outputParameterKey": "project_id", + "producerTask": "table-to-uri-3" + } + }, + "pipelineparam--table-to-uri-3-table_id": { + "taskOutputParameter": { + "outputParameterKey": "table_id", + "producerTask": "table-to-uri-3" + } + }, + "pipelineparam--table-to-uri-uri": { + "taskOutputParameter": { + "outputParameterKey": "uri", + "producerTask": "table-to-uri" + } + }, + "pipelineparam--target_column_name": { + "componentInputParameter": "pipelineparam--target_column_name" + }, + "pipelineparam--time_column": { + "componentInputParameter": "pipelineparam--time_column" + }, + "pipelineparam--time_series_identifier_column": { + "componentInputParameter": "pipelineparam--time_series_identifier_column" + } + } + }, + "parameterIterator": { + "itemInput": "pipelineparam--bigquery-list-rows-Output-loop-item", + "items": { + "inputParameter": "pipelineparam--bigquery-list-rows-Output" + } + }, + "taskInfo": { + "name": "for-loop-2" + } + }, + "generate-iso8601-underscore-datetime-format": { + "cachingOptions": {}, + "componentRef": { + "name": "comp-generate-iso8601-underscore-datetime-format" + }, + "dependentTasks": [ + "validate-inputs" + ], + "inputs": { + "parameters": { + "run_id": { + "runtimeValue": { + "constantValue": { + "stringValue": "{{$.pipeline_job_uuid}}" + } + } + } + } + }, + "taskInfo": { + "name": "generate-iso8601-underscore-datetime-format" + } + }, + "get-first-valid": { + "cachingOptions": { + "enableCache": true + }, + "componentRef": { + "name": "comp-get-first-valid" + }, + "dependentTasks": [ + "load-table-from-uri", + "parse-data-source" + ], + "inputs": { + "parameters": { + "pipelineparam--load-table-from-uri-Output": { + "taskOutputParameter": { + "outputParameterKey": "Output", + "producerTask": "load-table-from-uri" + } + }, + "pipelineparam--parse-data-source-bq_source": { + "taskOutputParameter": { + "outputParameterKey": "bq_source", + "producerTask": "parse-data-source" + } + }, + "values": { + "runtimeValue": { + "constantValue": { + "stringValue": "[\"{{$.inputs.parameters['pipelineparam--parse-data-source-bq_source']}}\", \"{{$.inputs.parameters['pipelineparam--load-table-from-uri-Output']}}\"]" + } + } + } + } + }, + "taskInfo": { + "name": "get-first-valid" + } + }, + "load-table-from-uri": { + "cachingOptions": { + "enableCache": true + }, + "componentRef": { + "name": "comp-load-table-from-uri" + }, + "dependentTasks": [ + "bigquery-create-dataset", + "parse-data-source" + ], + "inputs": { + "parameters": { + "destination": { + "runtimeValue": { + "constantValue": { + "stringValue": "{{$.inputs.parameters['pipelineparam--bigquery-create-dataset-project_id']}}.{{$.inputs.parameters['pipelineparam--bigquery-create-dataset-dataset_id']}}.csv_export" + } + } + }, + "location": { + "componentInputParameter": "pipelineparam--location" + }, + "pipelineparam--bigquery-create-dataset-dataset_id": { + "taskOutputParameter": { + "outputParameterKey": "dataset_id", + "producerTask": "bigquery-create-dataset" + } + }, + "pipelineparam--bigquery-create-dataset-project_id": { + "taskOutputParameter": { + "outputParameterKey": "project_id", + "producerTask": "bigquery-create-dataset" + } + }, + "project": { + "componentInputParameter": "pipelineparam--project" + }, + "source_format": { + "runtimeValue": { + "constantValue": { + "stringValue": "CSV" + } + } + }, + "source_uris": { + "taskOutputParameter": { + "outputParameterKey": "gcs_source", + "producerTask": "parse-data-source" + } + } + } + }, + "taskInfo": { + "name": "load-table-from-uri" + } + }, + "maybe-replace-with-default": { + "cachingOptions": { + "enableCache": true + }, + "componentRef": { + "name": "comp-maybe-replace-with-default" + }, + "dependentTasks": [ + "generate-iso8601-underscore-datetime-format" + ], + "inputs": { + "parameters": { + "default": { + "runtimeValue": { + "constantValue": { + "stringValue": "export_{{$.inputs.parameters['pipelineparam--generate-iso8601-underscore-datetime-format-Output']}}" + } + } + }, + "pipelineparam--generate-iso8601-underscore-datetime-format-Output": { + "taskOutputParameter": { + "outputParameterKey": "Output", + "producerTask": "generate-iso8601-underscore-datetime-format" + } + }, + "value": { + "componentInputParameter": "pipelineparam--bigquery_destination_uri" + } + } + }, + "taskInfo": { + "name": "maybe-replace-with-default" + } + }, + "maybe-replace-with-default-2": { + "cachingOptions": { + "enableCache": true + }, + "componentRef": { + "name": "comp-maybe-replace-with-default-2" + }, + "dependentTasks": [ + "parse-split-spec" + ], + "inputs": { + "parameters": { + "default": { + "runtimeValue": { + "constantValue": { + "stringValue": "ml_use" + } + } + }, + "value": { + "taskOutputParameter": { + "outputParameterKey": "predefined_split_column", + "producerTask": "parse-split-spec" + } + } + } + }, + "taskInfo": { + "name": "maybe-replace-with-default-2" + } + }, + "maybe-replace-with-default-3": { + "cachingOptions": { + "enableCache": true + }, + "componentRef": { + "name": "comp-maybe-replace-with-default-3" + }, + "dependentTasks": [ + "parse-split-spec" + ], + "inputs": { + "parameters": { + "default": { + "runtimeValue": { + "constantValue": { + "stringValue": "NULL" + } + } + }, + "value": { + "taskOutputParameter": { + "outputParameterKey": "predefined_split_column", + "producerTask": "parse-split-spec" + } + } + } + }, + "taskInfo": { + "name": "maybe-replace-with-default-3" + } + }, + "parse-data-source": { + "cachingOptions": { + "enableCache": true + }, + "componentRef": { + "name": "comp-parse-data-source" + }, + "inputs": { + "parameters": { + "data_source": { + "componentInputParameter": "pipelineparam--data_source" + } + } + }, + "taskInfo": { + "name": "parse-data-source" + } + }, + "parse-split-spec": { + "cachingOptions": { + "enableCache": true + }, + "componentRef": { + "name": "comp-parse-split-spec" + }, + "inputs": { + "parameters": { + "split_spec": { + "componentInputParameter": "pipelineparam--split_spec" + } + } + }, + "taskInfo": { + "name": "parse-split-spec" + } + }, + "table-to-uri": { + "cachingOptions": { + "enableCache": true + }, + "componentRef": { + "name": "comp-table-to-uri" + }, + "dependentTasks": [ + "bigquery-query-job" + ], + "inputs": { + "artifacts": { + "table": { + "taskOutputArtifact": { + "outputArtifactKey": "destination_table", + "producerTask": "bigquery-query-job" + } + } + } + }, + "taskInfo": { + "name": "table-to-uri" + } + }, + "table-to-uri-2": { + "cachingOptions": { + "enableCache": true + }, + "componentRef": { + "name": "comp-table-to-uri-2" + }, + "dependentTasks": [ + "bigquery-query-job-3" + ], + "inputs": { + "artifacts": { + "table": { + "taskOutputArtifact": { + "outputArtifactKey": "destination_table", + "producerTask": "bigquery-query-job-3" + } + } + } + }, + "taskInfo": { + "name": "table-to-uri-2" + } + }, + "table-to-uri-3": { + "cachingOptions": { + "enableCache": true + }, + "componentRef": { + "name": "comp-table-to-uri-3" + }, + "dependentTasks": [ + "bigquery-query-job-4" + ], + "inputs": { + "artifacts": { + "table": { + "taskOutputArtifact": { + "outputArtifactKey": "destination_table", + "producerTask": "bigquery-query-job-4" + } + } + } + }, + "taskInfo": { + "name": "table-to-uri-3" + } + }, + "validate-inputs": { + "cachingOptions": { + "enableCache": true + }, + "componentRef": { + "name": "comp-validate-inputs" + }, + "dependentTasks": [ + "parse-data-source", + "parse-split-spec" + ], + "inputs": { + "parameters": { + "bigquery_destination_uri": { + "componentInputParameter": "pipelineparam--bigquery_destination_uri" + }, + "bq_source": { + "taskOutputParameter": { + "outputParameterKey": "bq_source", + "producerTask": "parse-data-source" + } + }, + "gcs_source": { + "taskOutputParameter": { + "outputParameterKey": "gcs_source", + "producerTask": "parse-data-source" + } + }, + "predefined_split_column": { + "taskOutputParameter": { + "outputParameterKey": "predefined_split_column", + "producerTask": "parse-split-spec" + } + }, + "target_column_name": { + "componentInputParameter": "pipelineparam--target_column_name" + }, + "test_fraction_split": { + "taskOutputParameter": { + "outputParameterKey": "test_fraction_split", + "producerTask": "parse-split-spec" + } + }, + "time_column": { + "componentInputParameter": "pipelineparam--time_column" + }, + "time_series_identifier_column": { + "componentInputParameter": "pipelineparam--time_series_identifier_column" + }, + "training_fraction_split": { + "taskOutputParameter": { + "outputParameterKey": "training_fraction_split", + "producerTask": "parse-split-spec" + } + }, + "validation_fraction_split": { + "taskOutputParameter": { + "outputParameterKey": "validation_fraction_split", + "producerTask": "parse-split-spec" + } + } + } + }, + "taskInfo": { + "name": "validate-inputs" + } + } + } + }, + "inputDefinitions": { + "parameters": { + "pipelineparam--bigquery_destination_uri": { + "type": "STRING" + }, + "pipelineparam--data_granularity_unit": { + "type": "STRING" + }, + "pipelineparam--data_source": { + "type": "STRING" + }, + "pipelineparam--forecast_horizon": { + "type": "INT" + }, + "pipelineparam--location": { + "type": "STRING" + }, + "pipelineparam--max_order": { + "type": "INT" + }, + "pipelineparam--project": { + "type": "STRING" + }, + "pipelineparam--split_spec": { + "type": "STRING" + }, + "pipelineparam--target_column_name": { + "type": "STRING" + }, + "pipelineparam--time_column": { + "type": "STRING" + }, + "pipelineparam--time_series_identifier_column": { + "type": "STRING" + } + } + } + }, + "comp-for-loop-2": { + "dag": { + "tasks": { + "bigquery-create-model-job": { + "cachingOptions": { + "enableCache": true + }, + "componentRef": { + "name": "comp-bigquery-create-model-job" + }, + "dependentTasks": [ + "build-serialized-query-parameters-2" + ], + "inputs": { + "parameters": { + "job_configuration_query": { + "runtimeValue": { + "constantValue": { + "stringValue": "{}" + } + } + }, + "labels": { + "runtimeValue": { + "constantValue": { + "stringValue": "{}" + } + } + }, + "location": { + "componentInputParameter": "pipelineparam--location" + }, + "pipelineparam--bigquery-create-dataset-dataset_id": { + "componentInputParameter": "pipelineparam--bigquery-create-dataset-dataset_id" + }, + "pipelineparam--bigquery-create-dataset-project_id": { + "componentInputParameter": "pipelineparam--bigquery-create-dataset-project_id" + }, + "pipelineparam--maybe-replace-with-default-2-Output": { + "componentInputParameter": "pipelineparam--maybe-replace-with-default-2-Output" + }, + "pipelineparam--table-to-uri-uri": { + "componentInputParameter": "pipelineparam--table-to-uri-uri" + }, + "pipelineparam--target_column_name": { + "componentInputParameter": "pipelineparam--target_column_name" + }, + "pipelineparam--time_column": { + "componentInputParameter": "pipelineparam--time_column" + }, + "pipelineparam--time_series_identifier_column": { + "componentInputParameter": "pipelineparam--time_series_identifier_column" + }, + "project": { + "componentInputParameter": "pipelineparam--project" + }, + "query": { + "runtimeValue": { + "constantValue": { + "stringValue": "\n CREATE MODEL `{{$.inputs.parameters['pipelineparam--bigquery-create-dataset-project_id']}}.{{$.inputs.parameters['pipelineparam--bigquery-create-dataset-dataset_id']}}.model_{{$.pipeline_task_uuid}}`\n OPTIONS (\n model_type = 'ARIMA_PLUS',\n time_series_timestamp_col = '{{$.inputs.parameters['pipelineparam--time_column']}}',\n time_series_id_col = '{{$.inputs.parameters['pipelineparam--time_series_identifier_column']}}',\n time_series_data_col = '{{$.inputs.parameters['pipelineparam--target_column_name']}}',\n horizon = @forecast_horizon,\n auto_arima = True,\n auto_arima_max_order = @max_order,\n data_frequency = @data_granularity_unit,\n holiday_region = 'GLOBAL',\n clean_spikes_and_dips = True,\n adjust_step_changes = True,\n decompose_time_series = True\n ) AS\n SELECT\n {{$.inputs.parameters['pipelineparam--time_series_identifier_column']}},\n {{$.inputs.parameters['pipelineparam--time_column']}},\n {{$.inputs.parameters['pipelineparam--target_column_name']}},\n FROM `{{$.inputs.parameters['pipelineparam--table-to-uri-uri']}}`\n WHERE\n UPPER({{$.inputs.parameters['pipelineparam--maybe-replace-with-default-2-Output']}}) IN UNNEST(@splits)\n AND TIMESTAMP({{$.inputs.parameters['pipelineparam--time_column']}}) < @start_time\n " + } + } + }, + "query_parameters": { + "taskOutputParameter": { + "outputParameterKey": "Output", + "producerTask": "build-serialized-query-parameters-2" + } + } + } + }, + "taskInfo": { + "name": "create-eval-model" + } + }, + "bigquery-query-job-5": { + "cachingOptions": { + "enableCache": true + }, + "componentRef": { + "name": "comp-bigquery-query-job-5" + }, + "dependentTasks": [ + "build-serialized-query-parameters-2", + "model-to-uri" + ], + "inputs": { + "parameters": { + "job_configuration_query": { + "runtimeValue": { + "constantValue": { + "stringValue": "{\"destinationTable\": {\"projectId\": \"{{$.inputs.parameters['pipelineparam--table-to-uri-2-project_id']}}\", \"datasetId\": \"{{$.inputs.parameters['pipelineparam--table-to-uri-2-dataset_id']}}\", \"tableId\": \"{{$.inputs.parameters['pipelineparam--table-to-uri-2-table_id']}}\"}, \"writeDisposition\": \"WRITE_APPEND\"}" + } + } + }, + "labels": { + "runtimeValue": { + "constantValue": { + "stringValue": "{}" + } + } + }, + "location": { + "componentInputParameter": "pipelineparam--location" + }, + "pipelineparam--forecast_horizon": { + "componentInputParameter": "pipelineparam--forecast_horizon" + }, + "pipelineparam--model-to-uri-uri": { + "taskOutputParameter": { + "outputParameterKey": "uri", + "producerTask": "model-to-uri" + } + }, + "pipelineparam--table-to-uri-2-dataset_id": { + "componentInputParameter": "pipelineparam--table-to-uri-2-dataset_id" + }, + "pipelineparam--table-to-uri-2-project_id": { + "componentInputParameter": "pipelineparam--table-to-uri-2-project_id" + }, + "pipelineparam--table-to-uri-2-table_id": { + "componentInputParameter": "pipelineparam--table-to-uri-2-table_id" + }, + "pipelineparam--table-to-uri-uri": { + "componentInputParameter": "pipelineparam--table-to-uri-uri" + }, + "pipelineparam--time_column": { + "componentInputParameter": "pipelineparam--time_column" + }, + "project": { + "componentInputParameter": "pipelineparam--project" + }, + "query": { + "runtimeValue": { + "constantValue": { + "stringValue": "\n SELECT\n @start_time AS predicted_on_{{$.inputs.parameters['pipelineparam--time_column']}},\n AVG(mean_absolute_error) AS MAE,\n AVG(mean_squared_error) AS MSE,\n AVG(mean_absolute_percentage_error) AS MAPE,\n @prediction_count AS prediction_count,\n FROM ML.EVALUATE(\n MODEL `{{$.inputs.parameters['pipelineparam--model-to-uri-uri']}}`,\n TABLE `{{$.inputs.parameters['pipelineparam--table-to-uri-uri']}}`,\n STRUCT(True AS perform_aggregation, {{$.inputs.parameters['pipelineparam--forecast_horizon']}} as horizon))\n " + } + } + }, + "query_parameters": { + "taskOutputParameter": { + "outputParameterKey": "Output", + "producerTask": "build-serialized-query-parameters-2" + } + } + } + }, + "taskInfo": { + "name": "append-evaluation-metrics" + } + }, + "bigquery-query-job-6": { + "cachingOptions": { + "enableCache": true + }, + "componentRef": { + "name": "comp-bigquery-query-job-6" + }, + "dependentTasks": [ + "build-serialized-query-parameters-2", + "model-to-uri" + ], + "inputs": { + "parameters": { + "job_configuration_query": { + "runtimeValue": { + "constantValue": { + "stringValue": "{\"destinationTable\": {\"projectId\": \"{{$.inputs.parameters['pipelineparam--table-to-uri-3-project_id']}}\", \"datasetId\": \"{{$.inputs.parameters['pipelineparam--table-to-uri-3-dataset_id']}}\", \"tableId\": \"{{$.inputs.parameters['pipelineparam--table-to-uri-3-table_id']}}\"}, \"writeDisposition\": \"WRITE_APPEND\"}" + } + } + }, + "labels": { + "runtimeValue": { + "constantValue": { + "stringValue": "{}" + } + } + }, + "location": { + "componentInputParameter": "pipelineparam--location" + }, + "pipelineparam--forecast_horizon": { + "componentInputParameter": "pipelineparam--forecast_horizon" + }, + "pipelineparam--model-to-uri-uri": { + "taskOutputParameter": { + "outputParameterKey": "uri", + "producerTask": "model-to-uri" + } + }, + "pipelineparam--table-to-uri-3-dataset_id": { + "componentInputParameter": "pipelineparam--table-to-uri-3-dataset_id" + }, + "pipelineparam--table-to-uri-3-project_id": { + "componentInputParameter": "pipelineparam--table-to-uri-3-project_id" + }, + "pipelineparam--table-to-uri-3-table_id": { + "componentInputParameter": "pipelineparam--table-to-uri-3-table_id" + }, + "pipelineparam--table-to-uri-uri": { + "componentInputParameter": "pipelineparam--table-to-uri-uri" + }, + "pipelineparam--target_column_name": { + "componentInputParameter": "pipelineparam--target_column_name" + }, + "pipelineparam--time_column": { + "componentInputParameter": "pipelineparam--time_column" + }, + "pipelineparam--time_series_identifier_column": { + "componentInputParameter": "pipelineparam--time_series_identifier_column" + }, + "project": { + "componentInputParameter": "pipelineparam--project" + }, + "query": { + "runtimeValue": { + "constantValue": { + "stringValue": "\n SELECT\n actual.{{$.inputs.parameters['pipelineparam--time_series_identifier_column']}},\n TIMESTAMP(actual.{{$.inputs.parameters['pipelineparam--time_column']}}) AS {{$.inputs.parameters['pipelineparam--time_column']}},\n @start_time AS predicted_on_{{$.inputs.parameters['pipelineparam--time_column']}},\n actual.{{$.inputs.parameters['pipelineparam--target_column_name']}},\n STRUCT(pred.forecast_value AS value) AS predicted_{{$.inputs.parameters['pipelineparam--target_column_name']}},\n FROM\n ML.FORECAST(\n MODEL `{{$.inputs.parameters['pipelineparam--model-to-uri-uri']}}`,\n STRUCT({{$.inputs.parameters['pipelineparam--forecast_horizon']}} AS horizon)) pred\n JOIN `{{$.inputs.parameters['pipelineparam--table-to-uri-uri']}}` actual\n ON\n pred.forecast_timestamp = TIMESTAMP(actual.{{$.inputs.parameters['pipelineparam--time_column']}})\n AND pred.{{$.inputs.parameters['pipelineparam--time_series_identifier_column']}}\n = actual.{{$.inputs.parameters['pipelineparam--time_series_identifier_column']}}\n " + } + } + }, + "query_parameters": { + "taskOutputParameter": { + "outputParameterKey": "Output", + "producerTask": "build-serialized-query-parameters-2" + } + } + } + }, + "taskInfo": { + "name": "append-evaluated-examples" + } + }, + "build-serialized-query-parameters-2": { + "cachingOptions": { + "enableCache": true + }, + "componentRef": { + "name": "comp-build-serialized-query-parameters-2" + }, + "inputs": { + "parameters": { + "data_granularity_unit": { + "componentInputParameter": "pipelineparam--data_granularity_unit" + }, + "forecast_horizon": { + "componentInputParameter": "pipelineparam--forecast_horizon" + }, + "forecast_horizon_off_by_one": { + "runtimeValue": { + "constantValue": { + "stringValue": "False" + } + } + }, + "max_order": { + "componentInputParameter": "pipelineparam--max_order" + }, + "splits": { + "runtimeValue": { + "constantValue": { + "stringValue": "[\"TRAIN\", \"VALIDATE\"]" + } + } + }, + "window": { + "componentInputParameter": "pipelineparam--bigquery-list-rows-Output-loop-item" + } + } + }, + "taskInfo": { + "name": "build-serialized-query-parameters-2" + } + }, + "model-to-uri": { + "cachingOptions": { + "enableCache": true + }, + "componentRef": { + "name": "comp-model-to-uri" + }, + "dependentTasks": [ + "bigquery-create-model-job" + ], + "inputs": { + "artifacts": { + "model": { + "taskOutputArtifact": { + "outputArtifactKey": "model", + "producerTask": "bigquery-create-model-job" + } + } + } + }, + "taskInfo": { + "name": "model-to-uri" + } + } + } + }, + "inputDefinitions": { + "parameters": { + "pipelineparam--bigquery-create-dataset-dataset_id": { + "type": "STRING" + }, + "pipelineparam--bigquery-create-dataset-project_id": { + "type": "STRING" + }, + "pipelineparam--bigquery-list-rows-Output": { + "type": "STRING" + }, + "pipelineparam--bigquery-list-rows-Output-loop-item": { + "type": "STRING" + }, + "pipelineparam--data_granularity_unit": { + "type": "STRING" + }, + "pipelineparam--forecast_horizon": { + "type": "INT" + }, + "pipelineparam--location": { + "type": "STRING" + }, + "pipelineparam--max_order": { + "type": "INT" + }, + "pipelineparam--maybe-replace-with-default-2-Output": { + "type": "STRING" + }, + "pipelineparam--project": { + "type": "STRING" + }, + "pipelineparam--table-to-uri-2-dataset_id": { + "type": "STRING" + }, + "pipelineparam--table-to-uri-2-project_id": { + "type": "STRING" + }, + "pipelineparam--table-to-uri-2-table_id": { + "type": "STRING" + }, + "pipelineparam--table-to-uri-3-dataset_id": { + "type": "STRING" + }, + "pipelineparam--table-to-uri-3-project_id": { + "type": "STRING" + }, + "pipelineparam--table-to-uri-3-table_id": { + "type": "STRING" + }, + "pipelineparam--table-to-uri-uri": { + "type": "STRING" + }, + "pipelineparam--target_column_name": { + "type": "STRING" + }, + "pipelineparam--time_column": { + "type": "STRING" + }, + "pipelineparam--time_series_identifier_column": { + "type": "STRING" + } + } + } + }, + "comp-generate-iso8601-underscore-datetime-format": { + "executorLabel": "exec-generate-iso8601-underscore-datetime-format", + "inputDefinitions": { + "parameters": { + "run_id": { + "type": "STRING" + } + } + }, + "outputDefinitions": { + "parameters": { + "Output": { + "type": "STRING" + } + } + } + }, + "comp-get-first-valid": { + "executorLabel": "exec-get-first-valid", + "inputDefinitions": { + "parameters": { + "values": { + "type": "STRING" + } + } + }, + "outputDefinitions": { + "parameters": { + "Output": { + "type": "STRING" + } + } + } + }, + "comp-load-table-from-uri": { + "executorLabel": "exec-load-table-from-uri", + "inputDefinitions": { + "parameters": { + "destination": { + "type": "STRING" + }, + "location": { + "type": "STRING" + }, + "project": { + "type": "STRING" + }, + "source_format": { + "type": "STRING" + }, + "source_uris": { + "type": "STRING" + } + } + }, + "outputDefinitions": { + "parameters": { + "Output": { + "type": "STRING" + } + } + } + }, + "comp-maybe-replace-with-default": { + "executorLabel": "exec-maybe-replace-with-default", + "inputDefinitions": { + "parameters": { + "default": { + "type": "STRING" + }, + "value": { + "type": "STRING" + } + } + }, + "outputDefinitions": { + "parameters": { + "Output": { + "type": "STRING" + } + } + } + }, + "comp-maybe-replace-with-default-2": { + "executorLabel": "exec-maybe-replace-with-default-2", + "inputDefinitions": { + "parameters": { + "default": { + "type": "STRING" + }, + "value": { + "type": "STRING" + } + } + }, + "outputDefinitions": { + "parameters": { + "Output": { + "type": "STRING" + } + } + } + }, + "comp-maybe-replace-with-default-3": { + "executorLabel": "exec-maybe-replace-with-default-3", + "inputDefinitions": { + "parameters": { + "default": { + "type": "STRING" + }, + "value": { + "type": "STRING" + } + } + }, + "outputDefinitions": { + "parameters": { + "Output": { + "type": "STRING" + } + } + } + }, + "comp-model-to-uri": { + "executorLabel": "exec-model-to-uri", + "inputDefinitions": { + "artifacts": { + "model": { + "artifactType": { + "schemaTitle": "system.Artifact", + "schemaVersion": "0.0.1" + } + } + } + }, + "outputDefinitions": { + "parameters": { + "dataset_id": { + "type": "STRING" + }, + "model_id": { + "type": "STRING" + }, + "project_id": { + "type": "STRING" + }, + "uri": { + "type": "STRING" + } + } + } + }, + "comp-parse-data-source": { + "executorLabel": "exec-parse-data-source", + "inputDefinitions": { + "parameters": { + "data_source": { + "type": "STRING" + } + } + }, + "outputDefinitions": { + "parameters": { + "bq_source": { + "type": "STRING" + }, + "gcs_source": { + "type": "STRING" + } + } + } + }, + "comp-parse-split-spec": { + "executorLabel": "exec-parse-split-spec", + "inputDefinitions": { + "parameters": { + "split_spec": { + "type": "STRING" + } + } + }, + "outputDefinitions": { + "parameters": { + "predefined_split_column": { + "type": "STRING" + }, + "test_fraction_split": { + "type": "DOUBLE" + }, + "training_fraction_split": { + "type": "DOUBLE" + }, + "validation_fraction_split": { + "type": "DOUBLE" + } + } + } + }, + "comp-table-to-uri": { + "executorLabel": "exec-table-to-uri", + "inputDefinitions": { + "artifacts": { + "table": { + "artifactType": { + "schemaTitle": "system.Artifact", + "schemaVersion": "0.0.1" + } + } + } + }, + "outputDefinitions": { + "parameters": { + "dataset_id": { + "type": "STRING" + }, + "project_id": { + "type": "STRING" + }, + "table_id": { + "type": "STRING" + }, + "uri": { + "type": "STRING" + } + } + } + }, + "comp-table-to-uri-2": { + "executorLabel": "exec-table-to-uri-2", + "inputDefinitions": { + "artifacts": { + "table": { + "artifactType": { + "schemaTitle": "system.Artifact", + "schemaVersion": "0.0.1" + } + } + } + }, + "outputDefinitions": { + "parameters": { + "dataset_id": { + "type": "STRING" + }, + "project_id": { + "type": "STRING" + }, + "table_id": { + "type": "STRING" + }, + "uri": { + "type": "STRING" + } + } + } + }, + "comp-table-to-uri-3": { + "executorLabel": "exec-table-to-uri-3", + "inputDefinitions": { + "artifacts": { + "table": { + "artifactType": { + "schemaTitle": "system.Artifact", + "schemaVersion": "0.0.1" + } + } + } + }, + "outputDefinitions": { + "parameters": { + "dataset_id": { + "type": "STRING" + }, + "project_id": { + "type": "STRING" + }, + "table_id": { + "type": "STRING" + }, + "uri": { + "type": "STRING" + } + } + } + }, + "comp-validate-inputs": { + "executorLabel": "exec-validate-inputs", + "inputDefinitions": { + "parameters": { + "bigquery_destination_uri": { + "type": "STRING" + }, + "bq_source": { + "type": "STRING" + }, + "gcs_source": { + "type": "STRING" + }, + "predefined_split_column": { + "type": "STRING" + }, + "target_column_name": { + "type": "STRING" + }, + "test_fraction_split": { + "type": "DOUBLE" + }, + "time_column": { + "type": "STRING" + }, + "time_series_identifier_column": { + "type": "STRING" + }, + "training_fraction_split": { + "type": "DOUBLE" + }, + "validation_fraction_split": { + "type": "DOUBLE" + } + } + } + } + }, + "deploymentSpec": { + "executors": { + "exec-bigquery-create-dataset": { + "container": { + "args": [ + "--executor_input", + "{{$}}", + "--function_to_execute", + "bigquery_create_dataset" + ], + "command": [ + "sh", + "-c", + "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip || python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1 python3 -m pip install --quiet --no-warn-script-location 'google-cloud-bigquery==2.20.0' 'kfp==1.8.11' && \"$0\" \"$@\"\n", + "sh", + "-ec", + "program_path=$(mktemp -d)\nprintf \"%s\" \"$0\" > \"$program_path/ephemeral_component.py\"\npython3 -m kfp.v2.components.executor_main --component_module_path \"$program_path/ephemeral_component.py\" \"$@\"\n", + "\nimport kfp\nfrom kfp.v2 import dsl\nfrom kfp.v2.dsl import *\nfrom typing import *\n\ndef bigquery_create_dataset(\n project: str,\n location: str,\n dataset: str,\n exists_ok: bool = False,\n) -> NamedTuple('Outputs', [('project_id', str), ('dataset_id', str)]):\n \"\"\"Creates a BigQuery dataset.\"\"\"\n # pylint: disable=g-import-not-at-top,import-outside-toplevel\n import collections\n\n from google.cloud import bigquery\n # pylint: enable=g-import-not-at-top,import-outside-toplevel\n\n client = bigquery.Client(project=project, location=location)\n ref = client.create_dataset(dataset=dataset, exists_ok=exists_ok)\n return collections.namedtuple('Outputs', ['project_id', 'dataset_id'])(\n ref.project, ref.dataset_id)\n\n" + ], + "image": "python:3.7-slim" + } + }, + "exec-bigquery-create-dataset-2": { + "container": { + "args": [ + "--executor_input", + "{{$}}", + "--function_to_execute", + "bigquery_create_dataset" + ], + "command": [ + "sh", + "-c", + "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip || python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1 python3 -m pip install --quiet --no-warn-script-location 'google-cloud-bigquery==2.20.0' 'kfp==1.8.11' && \"$0\" \"$@\"\n", + "sh", + "-ec", + "program_path=$(mktemp -d)\nprintf \"%s\" \"$0\" > \"$program_path/ephemeral_component.py\"\npython3 -m kfp.v2.components.executor_main --component_module_path \"$program_path/ephemeral_component.py\" \"$@\"\n", + "\nimport kfp\nfrom kfp.v2 import dsl\nfrom kfp.v2.dsl import *\nfrom typing import *\n\ndef bigquery_create_dataset(\n project: str,\n location: str,\n dataset: str,\n exists_ok: bool = False,\n) -> NamedTuple('Outputs', [('project_id', str), ('dataset_id', str)]):\n \"\"\"Creates a BigQuery dataset.\"\"\"\n # pylint: disable=g-import-not-at-top,import-outside-toplevel\n import collections\n\n from google.cloud import bigquery\n # pylint: enable=g-import-not-at-top,import-outside-toplevel\n\n client = bigquery.Client(project=project, location=location)\n ref = client.create_dataset(dataset=dataset, exists_ok=exists_ok)\n return collections.namedtuple('Outputs', ['project_id', 'dataset_id'])(\n ref.project, ref.dataset_id)\n\n" + ], + "image": "python:3.7-slim" + } + }, + "exec-bigquery-create-model-job": { + "container": { + "args": [ + "--type", + "BigqueryCreateModelJob", + "--project", + "{{$.inputs.parameters['project']}}", + "--location", + "{{$.inputs.parameters['location']}}", + "--payload", + "{\"configuration\": {\"query\": {{$.inputs.parameters['job_configuration_query']}}, \"labels\": {{$.inputs.parameters['labels']}}}}", + "--job_configuration_query_override", + "{\"query\": \"{{$.inputs.parameters['query']}}\", \"query_parameters\": {{$.inputs.parameters['query_parameters']}}}", + "--gcp_resources", + "{{$.outputs.parameters['gcp_resources'].output_file}}", + "--executor_input", + "{{$}}" + ], + "command": [ + "python3", + "-u", + "-m", + "google_cloud_pipeline_components.container.v1.gcp_launcher.launcher" + ], + "image": "gcr.io/ml-pipeline/google-cloud-pipeline-components:0.3.0" + } + }, + "exec-bigquery-create-model-job-2": { + "container": { + "args": [ + "--type", + "BigqueryCreateModelJob", + "--project", + "{{$.inputs.parameters['project']}}", + "--location", + "{{$.inputs.parameters['location']}}", + "--payload", + "{\"configuration\": {\"query\": {{$.inputs.parameters['job_configuration_query']}}, \"labels\": {{$.inputs.parameters['labels']}}}}", + "--job_configuration_query_override", + "{\"query\": \"{{$.inputs.parameters['query']}}\", \"query_parameters\": {{$.inputs.parameters['query_parameters']}}}", + "--gcp_resources", + "{{$.outputs.parameters['gcp_resources'].output_file}}", + "--executor_input", + "{{$}}" + ], + "command": [ + "python3", + "-u", + "-m", + "google_cloud_pipeline_components.container.v1.gcp_launcher.launcher" + ], + "image": "gcr.io/ml-pipeline/google-cloud-pipeline-components:0.3.0" + } + }, + "exec-bigquery-delete-dataset-with-prefix": { + "container": { + "args": [ + "--executor_input", + "{{$}}", + "--function_to_execute", + "bigquery_delete_dataset_with_prefix" + ], + "command": [ + "sh", + "-c", + "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip || python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1 python3 -m pip install --quiet --no-warn-script-location 'google-cloud-bigquery==2.20.0' 'kfp==1.8.11' && \"$0\" \"$@\"\n", + "sh", + "-ec", + "program_path=$(mktemp -d)\nprintf \"%s\" \"$0\" > \"$program_path/ephemeral_component.py\"\npython3 -m kfp.v2.components.executor_main --component_module_path \"$program_path/ephemeral_component.py\" \"$@\"\n", + "\nimport kfp\nfrom kfp.v2 import dsl\nfrom kfp.v2.dsl import *\nfrom typing import *\n\ndef bigquery_delete_dataset_with_prefix(\n project: str,\n location: str,\n dataset_prefix: str,\n delete_contents: bool = False,\n) -> None:\n \"\"\"Deletes all BigQuery datasets matching the given prefix.\"\"\"\n # pylint: disable=g-import-not-at-top,import-outside-toplevel\n from google.cloud import bigquery\n # pylint: enable=g-import-not-at-top,import-outside-toplevel\n\n client = bigquery.Client(project=project, location=location)\n for dataset in client.list_datasets(project=project):\n if dataset.dataset_id.startswith(dataset_prefix):\n client.delete_dataset(\n dataset=dataset.dataset_id,\n delete_contents=delete_contents)\n\n" + ], + "image": "python:3.7-slim" + } + }, + "exec-bigquery-list-rows": { + "container": { + "args": [ + "--executor_input", + "{{$}}", + "--function_to_execute", + "bigquery_list_rows" + ], + "command": [ + "sh", + "-c", + "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip || python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1 python3 -m pip install --quiet --no-warn-script-location 'google-cloud-bigquery==2.20.0' 'kfp==1.8.11' && \"$0\" \"$@\"\n", + "sh", + "-ec", + "program_path=$(mktemp -d)\nprintf \"%s\" \"$0\" > \"$program_path/ephemeral_component.py\"\npython3 -m kfp.v2.components.executor_main --component_module_path \"$program_path/ephemeral_component.py\" \"$@\"\n", + "\nimport kfp\nfrom kfp.v2 import dsl\nfrom kfp.v2.dsl import *\nfrom typing import *\n\ndef bigquery_list_rows(\n project: str,\n location: str,\n table: dsl.Input[dsl.Artifact],\n) -> List[Dict[str, str]]:\n \"\"\"Lists the rows of the given BigQuery table.\n\n Args:\n project: The GCP project.\n location: The GCP region.\n table: A google.BQTable artifact.\n\n Returns:\n A list of dicts representing BigQuery rows. Rows are keyed by column, and\n all values are stored as strings.\n \"\"\"\n # pylint: disable=g-import-not-at-top,import-outside-toplevel\n from google.cloud import bigquery\n # pylint: enable=g-import-not-at-top,import-outside-toplevel\n\n client = bigquery.Client(project=project, location=location)\n metadata = table.metadata\n rows = client.list_rows('.'.join(\n [metadata['projectId'], metadata['datasetId'], metadata['tableId']]))\n result = []\n for row in rows:\n result.append({col: str(value) for col, value in dict(row).items()})\n return result\n\n" + ], + "image": "python:3.7-slim" + } + }, + "exec-bigquery-query-job": { + "container": { + "args": [ + "--type", + "BigqueryQueryJob", + "--project", + "{{$.inputs.parameters['project']}}", + "--location", + "{{$.inputs.parameters['location']}}", + "--payload", + "{\"configuration\": {\"query\": {{$.inputs.parameters['job_configuration_query']}}, \"labels\": {{$.inputs.parameters['labels']}}}}", + "--job_configuration_query_override", + "{\"query\": \"{{$.inputs.parameters['query']}}\", \"query_parameters\": {{$.inputs.parameters['query_parameters']}}, \"destination_encryption_configuration\": {\"kmsKeyName\": \"\"}}", + "--gcp_resources", + "{{$.outputs.parameters['gcp_resources'].output_file}}", + "--executor_input", + "{{$}}" + ], + "command": [ + "python3", + "-u", + "-m", + "google_cloud_pipeline_components.container.v1.gcp_launcher.launcher" + ], + "image": "gcr.io/ml-pipeline/google-cloud-pipeline-components:0.3.0" + } + }, + "exec-bigquery-query-job-2": { + "container": { + "args": [ + "--type", + "BigqueryQueryJob", + "--project", + "{{$.inputs.parameters['project']}}", + "--location", + "{{$.inputs.parameters['location']}}", + "--payload", + "{\"configuration\": {\"query\": {{$.inputs.parameters['job_configuration_query']}}, \"labels\": {{$.inputs.parameters['labels']}}}}", + "--job_configuration_query_override", + "{\"query\": \"{{$.inputs.parameters['query']}}\", \"query_parameters\": {{$.inputs.parameters['query_parameters']}}, \"destination_encryption_configuration\": {\"kmsKeyName\": \"\"}}", + "--gcp_resources", + "{{$.outputs.parameters['gcp_resources'].output_file}}", + "--executor_input", + "{{$}}" + ], + "command": [ + "python3", + "-u", + "-m", + "google_cloud_pipeline_components.container.v1.gcp_launcher.launcher" + ], + "image": "gcr.io/ml-pipeline/google-cloud-pipeline-components:0.3.0" + } + }, + "exec-bigquery-query-job-3": { + "container": { + "args": [ + "--type", + "BigqueryQueryJob", + "--project", + "{{$.inputs.parameters['project']}}", + "--location", + "{{$.inputs.parameters['location']}}", + "--payload", + "{\"configuration\": {\"query\": {{$.inputs.parameters['job_configuration_query']}}, \"labels\": {{$.inputs.parameters['labels']}}}}", + "--job_configuration_query_override", + "{\"query\": \"{{$.inputs.parameters['query']}}\", \"query_parameters\": {{$.inputs.parameters['query_parameters']}}, \"destination_encryption_configuration\": {\"kmsKeyName\": \"\"}}", + "--gcp_resources", + "{{$.outputs.parameters['gcp_resources'].output_file}}", + "--executor_input", + "{{$}}" + ], + "command": [ + "python3", + "-u", + "-m", + "google_cloud_pipeline_components.container.v1.gcp_launcher.launcher" + ], + "image": "gcr.io/ml-pipeline/google-cloud-pipeline-components:0.3.0" + } + }, + "exec-bigquery-query-job-4": { + "container": { + "args": [ + "--type", + "BigqueryQueryJob", + "--project", + "{{$.inputs.parameters['project']}}", + "--location", + "{{$.inputs.parameters['location']}}", + "--payload", + "{\"configuration\": {\"query\": {{$.inputs.parameters['job_configuration_query']}}, \"labels\": {{$.inputs.parameters['labels']}}}}", + "--job_configuration_query_override", + "{\"query\": \"{{$.inputs.parameters['query']}}\", \"query_parameters\": {{$.inputs.parameters['query_parameters']}}, \"destination_encryption_configuration\": {\"kmsKeyName\": \"\"}}", + "--gcp_resources", + "{{$.outputs.parameters['gcp_resources'].output_file}}", + "--executor_input", + "{{$}}" + ], + "command": [ + "python3", + "-u", + "-m", + "google_cloud_pipeline_components.container.v1.gcp_launcher.launcher" + ], + "image": "gcr.io/ml-pipeline/google-cloud-pipeline-components:0.3.0" + } + }, + "exec-bigquery-query-job-5": { + "container": { + "args": [ + "--type", + "BigqueryQueryJob", + "--project", + "{{$.inputs.parameters['project']}}", + "--location", + "{{$.inputs.parameters['location']}}", + "--payload", + "{\"configuration\": {\"query\": {{$.inputs.parameters['job_configuration_query']}}, \"labels\": {{$.inputs.parameters['labels']}}}}", + "--job_configuration_query_override", + "{\"query\": \"{{$.inputs.parameters['query']}}\", \"query_parameters\": {{$.inputs.parameters['query_parameters']}}, \"destination_encryption_configuration\": {\"kmsKeyName\": \"\"}}", + "--gcp_resources", + "{{$.outputs.parameters['gcp_resources'].output_file}}", + "--executor_input", + "{{$}}" + ], + "command": [ + "python3", + "-u", + "-m", + "google_cloud_pipeline_components.container.v1.gcp_launcher.launcher" + ], + "image": "gcr.io/ml-pipeline/google-cloud-pipeline-components:0.3.0" + } + }, + "exec-bigquery-query-job-6": { + "container": { + "args": [ + "--type", + "BigqueryQueryJob", + "--project", + "{{$.inputs.parameters['project']}}", + "--location", + "{{$.inputs.parameters['location']}}", + "--payload", + "{\"configuration\": {\"query\": {{$.inputs.parameters['job_configuration_query']}}, \"labels\": {{$.inputs.parameters['labels']}}}}", + "--job_configuration_query_override", + "{\"query\": \"{{$.inputs.parameters['query']}}\", \"query_parameters\": {{$.inputs.parameters['query_parameters']}}, \"destination_encryption_configuration\": {\"kmsKeyName\": \"\"}}", + "--gcp_resources", + "{{$.outputs.parameters['gcp_resources'].output_file}}", + "--executor_input", + "{{$}}" + ], + "command": [ + "python3", + "-u", + "-m", + "google_cloud_pipeline_components.container.v1.gcp_launcher.launcher" + ], + "image": "gcr.io/ml-pipeline/google-cloud-pipeline-components:0.3.0" + } + }, + "exec-bigquery-query-job-7": { + "container": { + "args": [ + "--type", + "BigqueryQueryJob", + "--project", + "{{$.inputs.parameters['project']}}", + "--location", + "{{$.inputs.parameters['location']}}", + "--payload", + "{\"configuration\": {\"query\": {{$.inputs.parameters['job_configuration_query']}}, \"labels\": {{$.inputs.parameters['labels']}}}}", + "--job_configuration_query_override", + "{\"query\": \"{{$.inputs.parameters['query']}}\", \"query_parameters\": {{$.inputs.parameters['query_parameters']}}, \"destination_encryption_configuration\": {\"kmsKeyName\": \"\"}}", + "--gcp_resources", + "{{$.outputs.parameters['gcp_resources'].output_file}}", + "--executor_input", + "{{$}}" + ], + "command": [ + "python3", + "-u", + "-m", + "google_cloud_pipeline_components.container.v1.gcp_launcher.launcher" + ], + "image": "gcr.io/ml-pipeline/google-cloud-pipeline-components:0.3.0" + } + }, + "exec-build-serialized-query-parameters": { + "container": { + "args": [ + "--executor_input", + "{{$}}", + "--function_to_execute", + "build_serialized_query_parameters" + ], + "command": [ + "sh", + "-c", + "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip || python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1 python3 -m pip install --quiet --no-warn-script-location 'kfp==1.8.11' && \"$0\" \"$@\"\n", + "sh", + "-ec", + "program_path=$(mktemp -d)\nprintf \"%s\" \"$0\" > \"$program_path/ephemeral_component.py\"\npython3 -m kfp.v2.components.executor_main --component_module_path \"$program_path/ephemeral_component.py\" \"$@\"\n", + "\nimport kfp\nfrom kfp.v2 import dsl\nfrom kfp.v2.dsl import *\nfrom typing import *\n\ndef build_serialized_query_parameters(\n forecast_horizon: Optional[int] = None,\n forecast_horizon_off_by_one: bool = False,\n data_granularity_unit: Optional[str] = None,\n splits: Optional[List[str]] = None,\n window: Optional[Dict[str, str]] = None,\n max_order: Optional[int] = None,\n) -> list: # pylint: disable=g-bare-generic\n \"\"\"Creates configuration JSON objects for BQML queries.\n\n All query parameters will be stored in a list of QueryParameter objects:\n https://cloud.google.com/bigquery/docs/reference/rest/v2/QueryParameter\n\n Args:\n forecast_horizon: The number of time periods into the future for which\n forecasts will be created. Future periods start after the latest timestamp\n for each time series.\n forecast_horizon_off_by_one: If True, subtract 1 from the forecast horizon\n in the query parameters.\n data_granularity_unit: The data granularity unit. Accepted values are:\n minute, hour, day, week, month, year.\n splits: Dataset splits to be used to train the model.\n window: Dict containing information about the forecast window the model\n should have. If no window is provided, the window will start after the\n latest period in the available data.\n max_order: Integer between 1 and 5 representing the size of the parameter\n search space for ARIMA_PLUS. 5 would result in the highest accuracy model,\n but also the longest training runtime.\n\n Returns:\n A list of QueryParameters.\n \"\"\"\n # pylint: disable=g-import-not-at-top,import-outside-toplevel\n import datetime\n # pylint: enable=g-import-not-at-top,import-outside-toplevel\n\n # Maps Vertex Forecasting time units to BQML time units.\n unit_map = {\n 'minute': 'per_minute',\n 'hour': 'hourly',\n 'day': 'daily',\n 'week': 'weekly',\n 'month': 'monthly',\n 'year': 'yearly',\n }\n query_parameters = []\n if data_granularity_unit is not None:\n if data_granularity_unit.lower() not in unit_map:\n raise ValueError(f'{data_granularity_unit} is not a valid time unit. '\n f'Must be one of: {\", \".join(unit_map.keys())}')\n query_parameters.append({\n 'name': 'data_granularity_unit',\n 'parameterType': {'type': 'STRING'},\n 'parameterValue': {\n 'value': unit_map[data_granularity_unit.lower()],\n },\n })\n if max_order is not None:\n query_parameters.append({\n 'name': 'max_order',\n 'parameterType': {'type': 'INTEGER'},\n 'parameterValue': {'value': str(max_order)},\n })\n if forecast_horizon is not None:\n if forecast_horizon_off_by_one:\n forecast_horizon -= 1\n query_parameters.append({\n 'name': 'forecast_horizon',\n 'parameterType': {'type': 'INTEGER'},\n 'parameterValue': {'value': str(forecast_horizon)},\n })\n if splits is not None:\n query_parameters.append({\n 'name': 'splits',\n 'parameterType': {\n 'type': 'ARRAY',\n 'arrayType': {'type': 'STRING'},\n },\n 'parameterValue': {\n 'arrayValues': [{'value': split} for split in splits],\n },\n })\n\n if window is not None:\n query_parameters.append({\n 'name': 'prediction_count',\n 'parameterType': {'type': 'INTEGER'},\n 'parameterValue': {'value': window['count']},\n })\n\n start_time = window['start_time'] if window else str(datetime.datetime.max)\n query_parameters.append({\n 'name': 'start_time',\n 'parameterType': {'type': 'TIMESTAMP'},\n 'parameterValue': {'value': start_time},\n })\n return query_parameters\n\n" + ], + "image": "python:3.7-slim" + } + }, + "exec-build-serialized-query-parameters-2": { + "container": { + "args": [ + "--executor_input", + "{{$}}", + "--function_to_execute", + "build_serialized_query_parameters" + ], + "command": [ + "sh", + "-c", + "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip || python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1 python3 -m pip install --quiet --no-warn-script-location 'kfp==1.8.11' && \"$0\" \"$@\"\n", + "sh", + "-ec", + "program_path=$(mktemp -d)\nprintf \"%s\" \"$0\" > \"$program_path/ephemeral_component.py\"\npython3 -m kfp.v2.components.executor_main --component_module_path \"$program_path/ephemeral_component.py\" \"$@\"\n", + "\nimport kfp\nfrom kfp.v2 import dsl\nfrom kfp.v2.dsl import *\nfrom typing import *\n\ndef build_serialized_query_parameters(\n forecast_horizon: Optional[int] = None,\n forecast_horizon_off_by_one: bool = False,\n data_granularity_unit: Optional[str] = None,\n splits: Optional[List[str]] = None,\n window: Optional[Dict[str, str]] = None,\n max_order: Optional[int] = None,\n) -> list: # pylint: disable=g-bare-generic\n \"\"\"Creates configuration JSON objects for BQML queries.\n\n All query parameters will be stored in a list of QueryParameter objects:\n https://cloud.google.com/bigquery/docs/reference/rest/v2/QueryParameter\n\n Args:\n forecast_horizon: The number of time periods into the future for which\n forecasts will be created. Future periods start after the latest timestamp\n for each time series.\n forecast_horizon_off_by_one: If True, subtract 1 from the forecast horizon\n in the query parameters.\n data_granularity_unit: The data granularity unit. Accepted values are:\n minute, hour, day, week, month, year.\n splits: Dataset splits to be used to train the model.\n window: Dict containing information about the forecast window the model\n should have. If no window is provided, the window will start after the\n latest period in the available data.\n max_order: Integer between 1 and 5 representing the size of the parameter\n search space for ARIMA_PLUS. 5 would result in the highest accuracy model,\n but also the longest training runtime.\n\n Returns:\n A list of QueryParameters.\n \"\"\"\n # pylint: disable=g-import-not-at-top,import-outside-toplevel\n import datetime\n # pylint: enable=g-import-not-at-top,import-outside-toplevel\n\n # Maps Vertex Forecasting time units to BQML time units.\n unit_map = {\n 'minute': 'per_minute',\n 'hour': 'hourly',\n 'day': 'daily',\n 'week': 'weekly',\n 'month': 'monthly',\n 'year': 'yearly',\n }\n query_parameters = []\n if data_granularity_unit is not None:\n if data_granularity_unit.lower() not in unit_map:\n raise ValueError(f'{data_granularity_unit} is not a valid time unit. '\n f'Must be one of: {\", \".join(unit_map.keys())}')\n query_parameters.append({\n 'name': 'data_granularity_unit',\n 'parameterType': {'type': 'STRING'},\n 'parameterValue': {\n 'value': unit_map[data_granularity_unit.lower()],\n },\n })\n if max_order is not None:\n query_parameters.append({\n 'name': 'max_order',\n 'parameterType': {'type': 'INTEGER'},\n 'parameterValue': {'value': str(max_order)},\n })\n if forecast_horizon is not None:\n if forecast_horizon_off_by_one:\n forecast_horizon -= 1\n query_parameters.append({\n 'name': 'forecast_horizon',\n 'parameterType': {'type': 'INTEGER'},\n 'parameterValue': {'value': str(forecast_horizon)},\n })\n if splits is not None:\n query_parameters.append({\n 'name': 'splits',\n 'parameterType': {\n 'type': 'ARRAY',\n 'arrayType': {'type': 'STRING'},\n },\n 'parameterValue': {\n 'arrayValues': [{'value': split} for split in splits],\n },\n })\n\n if window is not None:\n query_parameters.append({\n 'name': 'prediction_count',\n 'parameterType': {'type': 'INTEGER'},\n 'parameterValue': {'value': window['count']},\n })\n\n start_time = window['start_time'] if window else str(datetime.datetime.max)\n query_parameters.append({\n 'name': 'start_time',\n 'parameterType': {'type': 'TIMESTAMP'},\n 'parameterValue': {'value': start_time},\n })\n return query_parameters\n\n" + ], + "image": "python:3.7-slim" + } + }, + "exec-build-serialized-query-parameters-3": { + "container": { + "args": [ + "--executor_input", + "{{$}}", + "--function_to_execute", + "build_serialized_query_parameters" + ], + "command": [ + "sh", + "-c", + "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip || python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1 python3 -m pip install --quiet --no-warn-script-location 'kfp==1.8.11' && \"$0\" \"$@\"\n", + "sh", + "-ec", + "program_path=$(mktemp -d)\nprintf \"%s\" \"$0\" > \"$program_path/ephemeral_component.py\"\npython3 -m kfp.v2.components.executor_main --component_module_path \"$program_path/ephemeral_component.py\" \"$@\"\n", + "\nimport kfp\nfrom kfp.v2 import dsl\nfrom kfp.v2.dsl import *\nfrom typing import *\n\ndef build_serialized_query_parameters(\n forecast_horizon: Optional[int] = None,\n forecast_horizon_off_by_one: bool = False,\n data_granularity_unit: Optional[str] = None,\n splits: Optional[List[str]] = None,\n window: Optional[Dict[str, str]] = None,\n max_order: Optional[int] = None,\n) -> list: # pylint: disable=g-bare-generic\n \"\"\"Creates configuration JSON objects for BQML queries.\n\n All query parameters will be stored in a list of QueryParameter objects:\n https://cloud.google.com/bigquery/docs/reference/rest/v2/QueryParameter\n\n Args:\n forecast_horizon: The number of time periods into the future for which\n forecasts will be created. Future periods start after the latest timestamp\n for each time series.\n forecast_horizon_off_by_one: If True, subtract 1 from the forecast horizon\n in the query parameters.\n data_granularity_unit: The data granularity unit. Accepted values are:\n minute, hour, day, week, month, year.\n splits: Dataset splits to be used to train the model.\n window: Dict containing information about the forecast window the model\n should have. If no window is provided, the window will start after the\n latest period in the available data.\n max_order: Integer between 1 and 5 representing the size of the parameter\n search space for ARIMA_PLUS. 5 would result in the highest accuracy model,\n but also the longest training runtime.\n\n Returns:\n A list of QueryParameters.\n \"\"\"\n # pylint: disable=g-import-not-at-top,import-outside-toplevel\n import datetime\n # pylint: enable=g-import-not-at-top,import-outside-toplevel\n\n # Maps Vertex Forecasting time units to BQML time units.\n unit_map = {\n 'minute': 'per_minute',\n 'hour': 'hourly',\n 'day': 'daily',\n 'week': 'weekly',\n 'month': 'monthly',\n 'year': 'yearly',\n }\n query_parameters = []\n if data_granularity_unit is not None:\n if data_granularity_unit.lower() not in unit_map:\n raise ValueError(f'{data_granularity_unit} is not a valid time unit. '\n f'Must be one of: {\", \".join(unit_map.keys())}')\n query_parameters.append({\n 'name': 'data_granularity_unit',\n 'parameterType': {'type': 'STRING'},\n 'parameterValue': {\n 'value': unit_map[data_granularity_unit.lower()],\n },\n })\n if max_order is not None:\n query_parameters.append({\n 'name': 'max_order',\n 'parameterType': {'type': 'INTEGER'},\n 'parameterValue': {'value': str(max_order)},\n })\n if forecast_horizon is not None:\n if forecast_horizon_off_by_one:\n forecast_horizon -= 1\n query_parameters.append({\n 'name': 'forecast_horizon',\n 'parameterType': {'type': 'INTEGER'},\n 'parameterValue': {'value': str(forecast_horizon)},\n })\n if splits is not None:\n query_parameters.append({\n 'name': 'splits',\n 'parameterType': {\n 'type': 'ARRAY',\n 'arrayType': {'type': 'STRING'},\n },\n 'parameterValue': {\n 'arrayValues': [{'value': split} for split in splits],\n },\n })\n\n if window is not None:\n query_parameters.append({\n 'name': 'prediction_count',\n 'parameterType': {'type': 'INTEGER'},\n 'parameterValue': {'value': window['count']},\n })\n\n start_time = window['start_time'] if window else str(datetime.datetime.max)\n query_parameters.append({\n 'name': 'start_time',\n 'parameterType': {'type': 'TIMESTAMP'},\n 'parameterValue': {'value': start_time},\n })\n return query_parameters\n\n" + ], + "image": "python:3.7-slim" + } + }, + "exec-build-split-query-parameters": { + "container": { + "args": [ + "--executor_input", + "{{$}}", + "--function_to_execute", + "build_split_query_parameters" + ], + "command": [ + "sh", + "-c", + "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip || python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1 python3 -m pip install --quiet --no-warn-script-location 'kfp==1.8.11' && \"$0\" \"$@\"\n", + "sh", + "-ec", + "program_path=$(mktemp -d)\nprintf \"%s\" \"$0\" > \"$program_path/ephemeral_component.py\"\npython3 -m kfp.v2.components.executor_main --component_module_path \"$program_path/ephemeral_component.py\" \"$@\"\n", + "\nimport kfp\nfrom kfp.v2 import dsl\nfrom kfp.v2.dsl import *\nfrom typing import *\n\ndef build_split_query_parameters(\n training_fraction_split: float = 0,\n validation_fraction_split: float = 0,\n) -> list: # pylint: disable=g-bare-generic\n \"\"\"Creates configuration JSON objects for the split column query.\n\n All query parameters will be stored in a list of QueryParameter objects:\n https://cloud.google.com/bigquery/docs/reference/rest/v2/QueryParameter\n\n Args:\n training_fraction_split: Fraction of the data to use for training.\n validation_fraction_split: Fraction of the data to use for validation.\n\n Returns:\n A list of QueryParameters.\n \"\"\"\n return [\n {\n 'name': 'train_percent',\n 'parameterType': {'type': 'INTEGER'},\n 'parameterValue': {'value': str(int(training_fraction_split * 100))},\n },\n {\n 'name': 'validation_percent',\n 'parameterType': {'type': 'INTEGER'},\n 'parameterValue': {\n 'value': str(int(\n (training_fraction_split + validation_fraction_split) * 100)),\n },\n },\n ]\n\n" + ], + "image": "python:3.7-slim" + } + }, + "exec-copy-table": { + "container": { + "args": [ + "--executor_input", + "{{$}}", + "--function_to_execute", + "copy_table" + ], + "command": [ + "sh", + "-c", + "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip || python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1 python3 -m pip install --quiet --no-warn-script-location 'google-cloud-bigquery==2.20.0' 'kfp==1.8.11' && \"$0\" \"$@\"\n", + "sh", + "-ec", + "program_path=$(mktemp -d)\nprintf \"%s\" \"$0\" > \"$program_path/ephemeral_component.py\"\npython3 -m kfp.v2.components.executor_main --component_module_path \"$program_path/ephemeral_component.py\" \"$@\"\n", + "\nimport kfp\nfrom kfp.v2 import dsl\nfrom kfp.v2.dsl import *\nfrom typing import *\n\ndef copy_table(\n project: str,\n location: str,\n source: str,\n destination: str,\n):\n \"\"\"Copy one table to another table.\"\"\"\n # pylint: disable=g-import-not-at-top,import-outside-toplevel\n from google.cloud import bigquery\n # pylint: enable=g-import-not-at-top,import-outside-toplevel\n\n client = bigquery.Client(project=project, location=location)\n client.copy_table(source, destination).result()\n\n" + ], + "image": "python:3.7-slim" + } + }, + "exec-generate-iso8601-underscore-datetime-format": { + "container": { + "args": [ + "--executor_input", + "{{$}}", + "--function_to_execute", + "generate_iso8601_underscore_datetime_format" + ], + "command": [ + "sh", + "-c", + "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip || python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1 python3 -m pip install --quiet --no-warn-script-location 'kfp==1.8.11' && \"$0\" \"$@\"\n", + "sh", + "-ec", + "program_path=$(mktemp -d)\nprintf \"%s\" \"$0\" > \"$program_path/ephemeral_component.py\"\npython3 -m kfp.v2.components.executor_main --component_module_path \"$program_path/ephemeral_component.py\" \"$@\"\n", + "\nimport kfp\nfrom kfp.v2 import dsl\nfrom kfp.v2.dsl import *\nfrom typing import *\n\ndef generate_iso8601_underscore_datetime_format(run_id: str) -> str:\n \"\"\"Creates a timestamp using the same logic as Vertex Forecasting.\"\"\"\n # pylint: disable=g-import-not-at-top,import-outside-toplevel\n import datetime\n # pylint: enable=g-import-not-at-top,import-outside-toplevel\n\n timestamp = datetime.datetime.now().strftime('%Y_%m_%dT%H_%M_%S_%f')[:23]\n return f'{run_id}_{timestamp}Z'\n\n" + ], + "image": "python:3.7-slim" + } + }, + "exec-get-first-valid": { + "container": { + "args": [ + "--executor_input", + "{{$}}", + "--function_to_execute", + "get_first_valid" + ], + "command": [ + "sh", + "-c", + "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip || python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1 python3 -m pip install --quiet --no-warn-script-location 'kfp==1.8.11' && \"$0\" \"$@\"\n", + "sh", + "-ec", + "program_path=$(mktemp -d)\nprintf \"%s\" \"$0\" > \"$program_path/ephemeral_component.py\"\npython3 -m kfp.v2.components.executor_main --component_module_path \"$program_path/ephemeral_component.py\" \"$@\"\n", + "\nimport kfp\nfrom kfp.v2 import dsl\nfrom kfp.v2.dsl import *\nfrom typing import *\n\ndef get_first_valid(values: str) -> str:\n \"\"\"Returns the first truthy value from the given serialized JSON list.\"\"\"\n # pylint: disable=g-import-not-at-top,import-outside-toplevel\n import json\n # pylint: enable=g-import-not-at-top,import-outside-toplevel\n\n for value in json.loads(values):\n if value:\n return value\n raise ValueError('No valid values.')\n\n" + ], + "image": "python:3.7-slim" + } + }, + "exec-load-table-from-uri": { + "container": { + "args": [ + "--executor_input", + "{{$}}", + "--function_to_execute", + "load_table_from_uri" + ], + "command": [ + "sh", + "-c", + "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip || python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1 python3 -m pip install --quiet --no-warn-script-location 'google-cloud-bigquery==2.20.0' 'kfp==1.8.11' && \"$0\" \"$@\"\n", + "sh", + "-ec", + "program_path=$(mktemp -d)\nprintf \"%s\" \"$0\" > \"$program_path/ephemeral_component.py\"\npython3 -m kfp.v2.components.executor_main --component_module_path \"$program_path/ephemeral_component.py\" \"$@\"\n", + "\nimport kfp\nfrom kfp.v2 import dsl\nfrom kfp.v2.dsl import *\nfrom typing import *\n\ndef load_table_from_uri(\n project: str,\n location: str,\n source_uris: List[str],\n destination: str,\n source_format: str = 'CSV',\n) -> str:\n \"\"\"Creates a table from a list of URIs.\n\n Args:\n project: The GCP project.\n location: The GCP region.\n source_uris: URIs of data files to be loaded; in format\n gs://<bucket_name>/<object_name_or_glob>.\n destination: Table into which data is to be loaded.\n source_format: The file format for the files being imported. Only CSV is\n supported.\n\n Returns:\n The destination table containing imported data.\n \"\"\"\n # pylint: disable=g-import-not-at-top,import-outside-toplevel\n from google.cloud import bigquery\n # pylint: enable=g-import-not-at-top,import-outside-toplevel\n\n if not source_uris:\n return ''\n\n client = bigquery.Client(project=project, location=location)\n job_config = bigquery.LoadJobConfig(\n autodetect=True, source_format=source_format)\n client.load_table_from_uri(\n source_uris=source_uris,\n destination=destination,\n project=project,\n location=location,\n job_config=job_config).result()\n return destination\n\n" + ], + "image": "python:3.7-slim" + } + }, + "exec-maybe-replace-with-default": { + "container": { + "args": [ + "--executor_input", + "{{$}}", + "--function_to_execute", + "maybe_replace_with_default" + ], + "command": [ + "sh", + "-c", + "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip || python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1 python3 -m pip install --quiet --no-warn-script-location 'kfp==1.8.11' && \"$0\" \"$@\"\n", + "sh", + "-ec", + "program_path=$(mktemp -d)\nprintf \"%s\" \"$0\" > \"$program_path/ephemeral_component.py\"\npython3 -m kfp.v2.components.executor_main --component_module_path \"$program_path/ephemeral_component.py\" \"$@\"\n", + "\nimport kfp\nfrom kfp.v2 import dsl\nfrom kfp.v2.dsl import *\nfrom typing import *\n\ndef maybe_replace_with_default(value: str, default: str = '') -> str:\n \"\"\"Replaces string with another value if it is a dash.\"\"\"\n return default if not value or value == '-' else value\n\n" + ], + "image": "python:3.7-slim" + } + }, + "exec-maybe-replace-with-default-2": { + "container": { + "args": [ + "--executor_input", + "{{$}}", + "--function_to_execute", + "maybe_replace_with_default" + ], + "command": [ + "sh", + "-c", + "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip || python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1 python3 -m pip install --quiet --no-warn-script-location 'kfp==1.8.11' && \"$0\" \"$@\"\n", + "sh", + "-ec", + "program_path=$(mktemp -d)\nprintf \"%s\" \"$0\" > \"$program_path/ephemeral_component.py\"\npython3 -m kfp.v2.components.executor_main --component_module_path \"$program_path/ephemeral_component.py\" \"$@\"\n", + "\nimport kfp\nfrom kfp.v2 import dsl\nfrom kfp.v2.dsl import *\nfrom typing import *\n\ndef maybe_replace_with_default(value: str, default: str = '') -> str:\n \"\"\"Replaces string with another value if it is a dash.\"\"\"\n return default if not value or value == '-' else value\n\n" + ], + "image": "python:3.7-slim" + } + }, + "exec-maybe-replace-with-default-3": { + "container": { + "args": [ + "--executor_input", + "{{$}}", + "--function_to_execute", + "maybe_replace_with_default" + ], + "command": [ + "sh", + "-c", + "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip || python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1 python3 -m pip install --quiet --no-warn-script-location 'kfp==1.8.11' && \"$0\" \"$@\"\n", + "sh", + "-ec", + "program_path=$(mktemp -d)\nprintf \"%s\" \"$0\" > \"$program_path/ephemeral_component.py\"\npython3 -m kfp.v2.components.executor_main --component_module_path \"$program_path/ephemeral_component.py\" \"$@\"\n", + "\nimport kfp\nfrom kfp.v2 import dsl\nfrom kfp.v2.dsl import *\nfrom typing import *\n\ndef maybe_replace_with_default(value: str, default: str = '') -> str:\n \"\"\"Replaces string with another value if it is a dash.\"\"\"\n return default if not value or value == '-' else value\n\n" + ], + "image": "python:3.7-slim" + } + }, + "exec-model-to-uri": { + "container": { + "args": [ + "--executor_input", + "{{$}}", + "--function_to_execute", + "model_to_uri" + ], + "command": [ + "sh", + "-c", + "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip || python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1 python3 -m pip install --quiet --no-warn-script-location 'kfp==1.8.11' && \"$0\" \"$@\"\n", + "sh", + "-ec", + "program_path=$(mktemp -d)\nprintf \"%s\" \"$0\" > \"$program_path/ephemeral_component.py\"\npython3 -m kfp.v2.components.executor_main --component_module_path \"$program_path/ephemeral_component.py\" \"$@\"\n", + "\nimport kfp\nfrom kfp.v2 import dsl\nfrom kfp.v2.dsl import *\nfrom typing import *\n\ndef model_to_uri(\n model: dsl.Input[dsl.Artifact],\n) -> NamedTuple('Outputs', [\n ('project_id', str),\n ('dataset_id', str),\n ('model_id', str),\n ('uri', str),\n]):\n \"\"\"Converts a google.BQMLModel to a URI.\"\"\"\n # pylint: disable=g-import-not-at-top,import-outside-toplevel\n import collections\n # pylint: enable=g-import-not-at-top,import-outside-toplevel\n\n outputs = [\n model.metadata['projectId'],\n model.metadata['datasetId'],\n model.metadata['modelId'],\n ]\n outputs.append('.'.join(outputs))\n return collections.namedtuple(\n 'Outputs',\n ['project_id', 'dataset_id', 'model_id', 'uri'],\n )(*outputs)\n\n" + ], + "image": "python:3.7-slim" + } + }, + "exec-parse-data-source": { + "container": { + "args": [ + "--executor_input", + "{{$}}", + "--function_to_execute", + "parse_data_source" + ], + "command": [ + "sh", + "-c", + "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip || python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1 python3 -m pip install --quiet --no-warn-script-location 'kfp==1.8.11' && \"$0\" \"$@\"\n", + "sh", + "-ec", + "program_path=$(mktemp -d)\nprintf \"%s\" \"$0\" > \"$program_path/ephemeral_component.py\"\npython3 -m kfp.v2.components.executor_main --component_module_path \"$program_path/ephemeral_component.py\" \"$@\"\n", + "\nimport kfp\nfrom kfp.v2 import dsl\nfrom kfp.v2.dsl import *\nfrom typing import *\n\ndef parse_data_source(\n data_source: Dict[str, Dict[str, Union[str, List[str]]]],\n) -> NamedTuple('Outputs', [\n ('bq_source', str),\n ('gcs_source', List[str]),\n]):\n \"\"\"Converts the data source JSON into flat arguments.\"\"\"\n # pylint: disable=g-import-not-at-top,import-outside-toplevel\n import collections\n import re\n # pylint: enable=g-import-not-at-top,import-outside-toplevel\n\n result = {}\n if 'big_query_data_source' in data_source:\n result['bq_source'] = re.sub(\n '^bq://',\n '',\n data_source['big_query_data_source']['big_query_table_path'])\n if 'csv_data_source' in data_source:\n result['gcs_source'] = data_source['csv_data_source']['csv_filenames']\n return collections.namedtuple('Outputs', ['bq_source', 'gcs_source'])(\n result.get('bq_source', ''), result.get('gcs_source', []))\n\n" + ], + "image": "python:3.7-slim" + } + }, + "exec-parse-split-spec": { + "container": { + "args": [ + "--executor_input", + "{{$}}", + "--function_to_execute", + "parse_split_spec" + ], + "command": [ + "sh", + "-c", + "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip || python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1 python3 -m pip install --quiet --no-warn-script-location 'kfp==1.8.11' && \"$0\" \"$@\"\n", + "sh", + "-ec", + "program_path=$(mktemp -d)\nprintf \"%s\" \"$0\" > \"$program_path/ephemeral_component.py\"\npython3 -m kfp.v2.components.executor_main --component_module_path \"$program_path/ephemeral_component.py\" \"$@\"\n", + "\nimport kfp\nfrom kfp.v2 import dsl\nfrom kfp.v2.dsl import *\nfrom typing import *\n\ndef parse_split_spec(\n split_spec: Dict[str, Dict[str, Union[str, float]]],\n) -> NamedTuple('Outputs', [\n ('training_fraction_split', float),\n ('validation_fraction_split', float),\n ('test_fraction_split', float),\n ('predefined_split_column', str),\n\n]):\n \"\"\"Extracts the split column from a split spec JSON.\"\"\"\n # pylint: disable=g-import-not-at-top,import-outside-toplevel\n import collections\n # pylint: enable=g-import-not-at-top,import-outside-toplevel\n\n result = {}\n if 'predefined_split' in split_spec:\n result['predefined_split_column'] = split_spec['predefined_split']['key']\n if 'fraction_split' in split_spec:\n fractions = split_spec['fraction_split']\n result['training_fraction_split'] = fractions['training_fraction']\n result['validation_fraction_split'] = fractions['validation_fraction']\n result['test_fraction_split'] = fractions['test_fraction']\n return collections.namedtuple(\n 'Outputs',\n [\n 'training_fraction_split',\n 'validation_fraction_split',\n 'test_fraction_split',\n 'predefined_split_column',\n ],\n )(\n result.get('training_fraction_split', 0.0),\n result.get('validation_fraction_split', 0.0),\n result.get('test_fraction_split', 0.0),\n result.get('predefined_split_column', ''),\n )\n\n" + ], + "image": "python:3.7-slim" + } + }, + "exec-table-to-uri": { + "container": { + "args": [ + "--executor_input", + "{{$}}", + "--function_to_execute", + "table_to_uri" + ], + "command": [ + "sh", + "-c", + "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip || python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1 python3 -m pip install --quiet --no-warn-script-location 'kfp==1.8.11' && \"$0\" \"$@\"\n", + "sh", + "-ec", + "program_path=$(mktemp -d)\nprintf \"%s\" \"$0\" > \"$program_path/ephemeral_component.py\"\npython3 -m kfp.v2.components.executor_main --component_module_path \"$program_path/ephemeral_component.py\" \"$@\"\n", + "\nimport kfp\nfrom kfp.v2 import dsl\nfrom kfp.v2.dsl import *\nfrom typing import *\n\ndef table_to_uri(\n table: dsl.Input[dsl.Artifact],\n) -> NamedTuple('Outputs', [\n ('project_id', str),\n ('dataset_id', str),\n ('table_id', str),\n ('uri', str),\n]):\n \"\"\"Converts a google.BQTable to a URI.\"\"\"\n # pylint: disable=g-import-not-at-top,import-outside-toplevel\n import collections\n # pylint: enable=g-import-not-at-top,import-outside-toplevel\n\n outputs = [\n table.metadata['projectId'],\n table.metadata['datasetId'],\n table.metadata['tableId'],\n ]\n outputs.append('.'.join(outputs))\n return collections.namedtuple(\n 'Outputs',\n ['project_id', 'dataset_id', 'table_id', 'uri'],\n )(*outputs)\n\n" + ], + "image": "python:3.7-slim" + } + }, + "exec-table-to-uri-2": { + "container": { + "args": [ + "--executor_input", + "{{$}}", + "--function_to_execute", + "table_to_uri" + ], + "command": [ + "sh", + "-c", + "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip || python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1 python3 -m pip install --quiet --no-warn-script-location 'kfp==1.8.11' && \"$0\" \"$@\"\n", + "sh", + "-ec", + "program_path=$(mktemp -d)\nprintf \"%s\" \"$0\" > \"$program_path/ephemeral_component.py\"\npython3 -m kfp.v2.components.executor_main --component_module_path \"$program_path/ephemeral_component.py\" \"$@\"\n", + "\nimport kfp\nfrom kfp.v2 import dsl\nfrom kfp.v2.dsl import *\nfrom typing import *\n\ndef table_to_uri(\n table: dsl.Input[dsl.Artifact],\n) -> NamedTuple('Outputs', [\n ('project_id', str),\n ('dataset_id', str),\n ('table_id', str),\n ('uri', str),\n]):\n \"\"\"Converts a google.BQTable to a URI.\"\"\"\n # pylint: disable=g-import-not-at-top,import-outside-toplevel\n import collections\n # pylint: enable=g-import-not-at-top,import-outside-toplevel\n\n outputs = [\n table.metadata['projectId'],\n table.metadata['datasetId'],\n table.metadata['tableId'],\n ]\n outputs.append('.'.join(outputs))\n return collections.namedtuple(\n 'Outputs',\n ['project_id', 'dataset_id', 'table_id', 'uri'],\n )(*outputs)\n\n" + ], + "image": "python:3.7-slim" + } + }, + "exec-table-to-uri-3": { + "container": { + "args": [ + "--executor_input", + "{{$}}", + "--function_to_execute", + "table_to_uri" + ], + "command": [ + "sh", + "-c", + "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip || python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1 python3 -m pip install --quiet --no-warn-script-location 'kfp==1.8.11' && \"$0\" \"$@\"\n", + "sh", + "-ec", + "program_path=$(mktemp -d)\nprintf \"%s\" \"$0\" > \"$program_path/ephemeral_component.py\"\npython3 -m kfp.v2.components.executor_main --component_module_path \"$program_path/ephemeral_component.py\" \"$@\"\n", + "\nimport kfp\nfrom kfp.v2 import dsl\nfrom kfp.v2.dsl import *\nfrom typing import *\n\ndef table_to_uri(\n table: dsl.Input[dsl.Artifact],\n) -> NamedTuple('Outputs', [\n ('project_id', str),\n ('dataset_id', str),\n ('table_id', str),\n ('uri', str),\n]):\n \"\"\"Converts a google.BQTable to a URI.\"\"\"\n # pylint: disable=g-import-not-at-top,import-outside-toplevel\n import collections\n # pylint: enable=g-import-not-at-top,import-outside-toplevel\n\n outputs = [\n table.metadata['projectId'],\n table.metadata['datasetId'],\n table.metadata['tableId'],\n ]\n outputs.append('.'.join(outputs))\n return collections.namedtuple(\n 'Outputs',\n ['project_id', 'dataset_id', 'table_id', 'uri'],\n )(*outputs)\n\n" + ], + "image": "python:3.7-slim" + } + }, + "exec-validate-inputs": { + "container": { + "args": [ + "--executor_input", + "{{$}}", + "--function_to_execute", + "validate_inputs" + ], + "command": [ + "sh", + "-c", + "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip || python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1 python3 -m pip install --quiet --no-warn-script-location 'kfp==1.8.11' && \"$0\" \"$@\"\n", + "sh", + "-ec", + "program_path=$(mktemp -d)\nprintf \"%s\" \"$0\" > \"$program_path/ephemeral_component.py\"\npython3 -m kfp.v2.components.executor_main --component_module_path \"$program_path/ephemeral_component.py\" \"$@\"\n", + "\nimport kfp\nfrom kfp.v2 import dsl\nfrom kfp.v2.dsl import *\nfrom typing import *\n\ndef validate_inputs(\n time_column: Optional[str] = None,\n time_series_identifier_column: Optional[str] = None,\n target_column_name: Optional[str] = None,\n bq_source: Optional[str] = None,\n training_fraction_split: Optional[float] = None,\n validation_fraction_split: Optional[float] = None,\n test_fraction_split: Optional[float] = None,\n predefined_split_column: Optional[str] = None,\n gcs_source: Optional[List[str]] = None,\n source_model_uri: Optional[str] = None,\n bigquery_destination_uri: Optional[str] = None,\n) -> None:\n \"\"\"Checks training pipeline input parameters are valid.\"\"\"\n # pylint: disable=g-import-not-at-top,import-outside-toplevel\n import re\n # pylint: enable=g-import-not-at-top,import-outside-toplevel\n\n project_pattern = r'([a-z0-9.-]+:)?[a-z][a-z0-9-_]{4,28}[a-z0-9]'\n dataset_pattern = r'.[a-zA-Z0-9_]+'\n table_pattern = r'.[^\\.\\:`]+'\n\n # Validate BigQuery column and dataset names.\n bigquery_column_parameters = [\n time_column,\n time_series_identifier_column,\n target_column_name,\n ]\n column_pattern = re.compile(r'[a-zA-Z_][a-zA-Z0-9_]{1,300}')\n for column in bigquery_column_parameters:\n if column is not None and not column_pattern.fullmatch(column):\n raise ValueError(f'Invalid column name: {column}.')\n dataset_uri_pattern = re.compile(project_pattern + dataset_pattern)\n if (\n bigquery_destination_uri != '-'\n and bigquery_destination_uri is not None\n and not dataset_uri_pattern.fullmatch(bigquery_destination_uri)\n ):\n raise ValueError(\n f'Invalid BigQuery dataset URI: {bigquery_destination_uri}.')\n table_uri_pattern = re.compile(\n project_pattern + dataset_pattern + table_pattern)\n if (\n source_model_uri is not None\n and not table_uri_pattern.fullmatch(source_model_uri)\n ):\n raise ValueError(f'Invalid BigQuery table URI: {source_model_uri}.')\n\n # Validate data source.\n data_source_count = sum([bool(source) for source in [bq_source, gcs_source]])\n if data_source_count > 1:\n raise ValueError(f'Expected 1 data source, found {data_source_count}.')\n if bq_source and not table_uri_pattern.fullmatch(bq_source):\n raise ValueError(f'Invalid BigQuery table URI: {bq_source}.')\n gcs_path_pattern = re.compile(r'gs:\\/\\/(.+)\\/([^\\/]+)')\n if gcs_source:\n for gcs_path in gcs_source:\n if not gcs_path_pattern.fullmatch(gcs_path):\n raise ValueError(f'Invalid path to CSV stored in GCS: {gcs_path}.')\n\n # Validate split spec.\n fraction_splits = [\n training_fraction_split,\n validation_fraction_split,\n test_fraction_split,\n ]\n split_count = sum(\n [bool(source)\n for source in [predefined_split_column, any(fraction_splits)]])\n if split_count > 1:\n raise ValueError(f'Expected 1 split type, found {split_count}.')\n if (\n predefined_split_column\n and not column_pattern.fullmatch(predefined_split_column)\n ):\n raise ValueError(f'Invalid column name: {predefined_split_column}.')\n if any(fraction_splits):\n if not all(fraction_splits):\n raise ValueError(\n f'All fractions must be non-zero. Got: {fraction_splits}.')\n if sum(fraction_splits) != 1:\n raise ValueError(\n f'Fraction splits must sum to 1. Got: {sum(fraction_splits)}.')\n\n" + ], + "image": "python:3.7-slim" + } + } + } + }, + "pipelineInfo": { + "name": "automl-tabular-bqml-arima-train" + }, + "root": { + "dag": { + "tasks": { + "bigquery-delete-dataset-with-prefix": { + "componentRef": { + "name": "comp-bigquery-delete-dataset-with-prefix" + }, + "dependentTasks": [ + "exit-handler-1" + ], + "inputs": { + "parameters": { + "dataset_prefix": { + "runtimeValue": { + "constantValue": { + "stringValue": "tmp_{{$.pipeline_job_uuid}}" + } + } + }, + "delete_contents": { + "runtimeValue": { + "constantValue": { + "intValue": "1" + } + } + }, + "location": { + "componentInputParameter": "location" + }, + "project": { + "componentInputParameter": "project" + } + } + }, + "taskInfo": { + "name": "delete-tmp-dataset" + }, + "triggerPolicy": { + "strategy": "ALL_UPSTREAM_TASKS_COMPLETED" + } + }, + "exit-handler-1": { + "componentRef": { + "name": "comp-exit-handler-1" + }, + "inputs": { + "parameters": { + "pipelineparam--bigquery_destination_uri": { + "componentInputParameter": "bigquery_destination_uri" + }, + "pipelineparam--data_granularity_unit": { + "componentInputParameter": "data_granularity_unit" + }, + "pipelineparam--data_source": { + "componentInputParameter": "data_source" + }, + "pipelineparam--forecast_horizon": { + "componentInputParameter": "forecast_horizon" + }, + "pipelineparam--location": { + "componentInputParameter": "location" + }, + "pipelineparam--max_order": { + "componentInputParameter": "max_order" + }, + "pipelineparam--project": { + "componentInputParameter": "project" + }, + "pipelineparam--split_spec": { + "componentInputParameter": "split_spec" + }, + "pipelineparam--target_column_name": { + "componentInputParameter": "target_column_name" + }, + "pipelineparam--time_column": { + "componentInputParameter": "time_column" + }, + "pipelineparam--time_series_identifier_column": { + "componentInputParameter": "time_series_identifier_column" + } + } + }, + "taskInfo": { + "name": "exit-handler-1" + } + } + } + }, + "inputDefinitions": { + "parameters": { + "bigquery_destination_uri": { + "type": "STRING" + }, + "data_granularity_unit": { + "type": "STRING" + }, + "data_source": { + "type": "STRING" + }, + "forecast_horizon": { + "type": "INT" + }, + "location": { + "type": "STRING" + }, + "max_order": { + "type": "INT" + }, + "override_destination": { + "type": "STRING" + }, + "project": { + "type": "STRING" + }, + "split_spec": { + "type": "STRING" + }, + "target_column_name": { + "type": "STRING" + }, + "time_column": { + "type": "STRING" + }, + "time_series_identifier_column": { + "type": "STRING" + } + } + } + }, + "schemaVersion": "2.0.0", + "sdkVersion": "kfp-1.8.11" + }, + "runtimeConfig": { + "parameters": { + "bigquery_destination_uri": { + "stringValue": "-" + }, + "data_source": { + "stringValue": "{\"big_query_data_source\": {\"big_query_table_path\": \"bq://[PROJECT].[DATASET].[TABLE]\"}}" + }, + "max_order": { + "intValue": "5" + }, + "override_destination": { + "stringValue": "False" + }, + "split_spec": { + "stringValue": "{\"fraction_split\": {\"test_fraction\": 0.1, \"training_fraction\": 0.8, \"validation_fraction\": 0.1}}" + } + } + } +} \ No newline at end of file diff --git a/components/google-cloud/google_cloud_pipeline_components/experimental/automl/forecasting/utils.py b/components/google-cloud/google_cloud_pipeline_components/experimental/automl/forecasting/utils.py new file mode 100644 index 000000000000..b0c0ea2115f7 --- /dev/null +++ b/components/google-cloud/google_cloud_pipeline_components/experimental/automl/forecasting/utils.py @@ -0,0 +1,150 @@ +"""Util functions for Vertex Forecasting pipelines.""" + +import os +import pathlib +from typing import Any, Dict, List, Optional, Tuple, Union + + +def get_bqml_arima_train_pipeline_and_parameters( + project: str, + location: str, + time_column: str, + time_series_identifier_column: str, + target_column_name: str, + forecast_horizon: int, + data_granularity_unit: str, + data_source: Dict[str, Dict[str, Union[List[str], str]]], + split_spec: Optional[Dict[str, Dict[str, Union[str, float]]]] = None, + bigquery_destination_uri: str = '', + override_destination: bool = False, + max_order: int = 5, +) -> Tuple[str, Dict[str, Any]]: + """Get the BQML ARIMA_PLUS training pipeline. + + Args: + project: The GCP project that runs the pipeline components. + location: The GCP region that runs the pipeline components. + time_column: Name of the column that identifies time order in the time + series. + time_series_identifier_column: Name of the column that identifies the time + series. + target_column_name: Name of the column that the model is to predict values + for. + forecast_horizon: The number of time periods into the future for which + forecasts will be created. Future periods start after the latest timestamp + for each time series. + data_granularity_unit: The data granularity unit. Accepted values are: + minute, hour, day, week, month, year. + data_source: Serialized JSON with URI of BigQuery table containing training + data. This table should be provided in a JSON object that looks like: + { + "big_query_data_source": { + "big_query_table_path": "bq://[PROJECT].[DATASET].[TABLE]" + } + } + or + { + "csv_data_source": { + "csv_filenames": [ [GCS_PATHS] ], + } + split_spec: Serialized JSON with name of the column containing the dataset + each row belongs to. Valid values in this column are: TRAIN, VALIDATE, and + TEST. This column should be provided in a JSON object that looks like: + {"predefined_split": {"key": "[SPLIT_COLUMN]"}} + or + { + 'fraction_split': { + 'training_fraction': 0.8, + 'validation_fraction': 0.1, + 'test_fraction': 0.1, + }, + } + bigquery_destination_uri: URI of the desired destination dataset. If not + specified, resources will be created under a new dataset in the project. + Unlike in Vertex Forecasting, all resources will be given hardcoded names + under this dataset, and the model artifact will also be exported here. + override_destination: Whether to override a + model or table if it already exists. If False and the resource exists, the + training job will fail. + max_order: Integer between 1 and 5 representing the size of the parameter + search space for ARIMA_PLUS. 5 would result in the highest accuracy model, + but also the longest training runtime. + + Returns: + Tuple of pipeline_definiton_path and parameter_values. + """ + if split_spec is None: + split_spec = { + 'fraction_split': { + 'training_fraction': 0.8, + 'validation_fraction': 0.1, + 'test_fraction': 0.1, + }, + } + parameter_values = { + 'project': project, + 'location': location, + 'time_column': time_column, + 'time_series_identifier_column': time_series_identifier_column, + 'target_column_name': target_column_name, + 'forecast_horizon': forecast_horizon, + 'data_granularity_unit': data_granularity_unit, + 'data_source': data_source, + 'split_spec': split_spec, + 'bigquery_destination_uri': bigquery_destination_uri, + 'override_destination': override_destination, + 'max_order': max_order, + } + pipeline_definition_path = os.path.join( + pathlib.Path(__file__).parent.resolve(), + 'bqml_arima_train_pipeline.json') + return pipeline_definition_path, parameter_values + + +def get_bqml_arima_predict_pipeline_and_parameters( + project: str, + location: str, + model_name: str, + data_source: Dict[str, Dict[str, Union[List[str], str]]], + bigquery_destination_uri: str = '', + generate_explanation: bool = False, +) -> Tuple[str, Dict[str, Any]]: + """Get the BQML ARIMA_PLUS prediction pipeline. + + Args: + project: The GCP project that runs the pipeline components. + location: The GCP region that runs the pipeline components. + model_name: ARIMA_PLUS BQML model URI. + data_source: Serialized JSON with URI of BigQuery table containing input + data. This table should be provided in a JSON object that looks like: + { + "big_query_data_source": { + "big_query_table_path": "bq://[PROJECT].[DATASET].[TABLE]" + } + } + or + { + "csv_data_source": { + "csv_filenames": [ [GCS_PATHS] ], + } + bigquery_destination_uri: URI of the desired destination dataset. If not + specified, a resource will be created under a new dataset in the project. + generate_explanation: Generate explanation along with the batch prediction + results. This will cause the batch prediction output to include + explanations. + + Returns: + Tuple of pipeline_definiton_path and parameter_values. + """ + parameter_values = { + 'project': project, + 'location': location, + 'model_name': model_name, + 'data_source': data_source, + 'bigquery_destination_uri': bigquery_destination_uri, + 'generate_explanation': generate_explanation, + } + pipeline_definition_path = os.path.join( + pathlib.Path(__file__).parent.resolve(), + 'bqml_arima_predict_pipeline.json') + return pipeline_definition_path, parameter_values diff --git a/components/google-cloud/google_cloud_pipeline_components/experimental/automl/tabular/tabnet_hyperparameter_tuning_job_pipeline.json b/components/google-cloud/google_cloud_pipeline_components/experimental/automl/tabular/tabnet_hyperparameter_tuning_job_pipeline.json index 473a8812fbfb..cfb2ee464a1f 100644 --- a/components/google-cloud/google_cloud_pipeline_components/experimental/automl/tabular/tabnet_hyperparameter_tuning_job_pipeline.json +++ b/components/google-cloud/google_cloud_pipeline_components/experimental/automl/tabular/tabnet_hyperparameter_tuning_job_pipeline.json @@ -1073,6 +1073,9 @@ } }, "parameters": { + "downsampled_test_split_json": { + "type": "STRING" + }, "gcp_resources": { "type": "STRING" }, @@ -1211,7 +1214,7 @@ "--gcp_resources", "{{$.outputs.parameters['gcp_resources'].output_file}}", "--payload", - "{\"display_name\": \"tabular-stats-and-example-gen-{{$.pipeline_job_uuid}}-{{$.pipeline_task_uuid}}\", \"encryption_spec\": {\"kms_key_name\":\"{{$.inputs.parameters['encryption_spec_key_name']}}\"}, \"job_spec\": {\"worker_pool_specs\": [{\"replica_count\": 1, \"machine_spec\": {\"machine_type\": \"n1-standard-8\"}, \"container_spec\": {\"image_uri\":\"us-docker.pkg.dev/vertex-ai-restricted/automl-tabular/training:prod\", \"args\": [\"stats_generator\",\"--train_spec={\\\"prediction_type\\\": \\\"{{$.inputs.parameters['prediction_type']}}\\\", \\\"target_column\\\": \\\"{{$.inputs.parameters['target_column_name']}}\\\", \\\"optimization_objective\\\": \\\"{{$.inputs.parameters['optimization_objective']}}\\\", \\\"weight_column_name\\\": \\\"{{$.inputs.parameters['weight_column_name']}}\\\", \\\"transformations\\\": {{$.inputs.parameters['transformations']}}}\", \"--split_spec={{$.inputs.parameters['split_spec']}}\", \"--data_source={{$.inputs.parameters['data_source']}}\", \"--target_column={{$.inputs.parameters['target_column_name']}}\", \"--request_type=COLUMN_STATS_ONLY\", \"--optimization_objective_recall_value={{$.inputs.parameters['optimization_objective_recall_value']}}\", \"--optimization_objective_precision_value={{$.inputs.parameters['optimization_objective_precision_value']}}\", \"--example_gen_gcs_output_prefix={{$.inputs.parameters['root_dir']}}/{{$.pipeline_job_uuid}}/{{$.pipeline_task_uuid}}/example_gen_output\", \"--dataset_stats_dir={{$.inputs.parameters['root_dir']}}/{{$.pipeline_job_uuid}}/{{$.pipeline_task_uuid}}/stats/\", \"--stats_result_path={{$.outputs.artifacts['dataset_stats'].uri}}\", \"--dataset_schema_path={{$.outputs.artifacts['dataset_schema'].uri}}\", \"--job_name=tabular-stats-and-example-gen-{{$.pipeline_job_uuid}}-{{$.pipeline_task_uuid}}\", \"--dataflow_project={{$.inputs.parameters['project']}}\", \"--error_file_path={{$.inputs.parameters['root_dir']}}/{{$.pipeline_job_uuid}}/{{$.pipeline_task_uuid}}/error.pb\", \"--dataflow_staging_dir={{$.inputs.parameters['root_dir']}}/{{$.pipeline_job_uuid}}/{{$.pipeline_task_uuid}}/dataflow_staging\", \"--dataflow_tmp_dir={{$.inputs.parameters['root_dir']}}/{{$.pipeline_job_uuid}}/{{$.pipeline_task_uuid}}/dataflow_tmp\", \"--dataflow_max_num_workers={{$.inputs.parameters['dataflow_max_num_workers']}}\", \"--dataflow_worker_container_image=us-docker.pkg.dev/vertex-ai/automl-tabular/dataflow-worker:prod\", \"--dataflow_machine_type={{$.inputs.parameters['dataflow_machine_type']}}\", \"--dataflow_disk_size_gb={{$.inputs.parameters['dataflow_disk_size_gb']}}\", \"--dataflow_kms_key={{$.inputs.parameters['encryption_spec_key_name']}}\", \"--dataflow_subnetwork_fully_qualified={{$.inputs.parameters['dataflow_subnetwork']}}\", \"--dataflow_use_public_ips={{$.inputs.parameters['dataflow_use_public_ips']}}\", \"--is_distill={{$.inputs.parameters['is_distill']}}\", \"--metadata_path={{$.outputs.artifacts['metadata'].uri}}\", \"--train_split={{$.outputs.artifacts['train_split'].uri}}\", \"--eval_split={{$.outputs.artifacts['eval_split'].uri}}\", \"--test_split={{$.outputs.artifacts['test_split'].uri}}\", \"--test_split_for_batch_prediction_component={{$.outputs.parameters['test_split_json'].output_file}}\", \"--instance_baseline_path={{$.outputs.artifacts['instance_baseline'].uri}}\", \"--parse_json=true\"]}}]}}" + "{\"display_name\": \"tabular-stats-and-example-gen-{{$.pipeline_job_uuid}}-{{$.pipeline_task_uuid}}\", \"encryption_spec\": {\"kms_key_name\":\"{{$.inputs.parameters['encryption_spec_key_name']}}\"}, \"job_spec\": {\"worker_pool_specs\": [{\"replica_count\": 1, \"machine_spec\": {\"machine_type\": \"n1-standard-8\"}, \"container_spec\": {\"image_uri\":\"us-docker.pkg.dev/vertex-ai-restricted/automl-tabular/training:prod\", \"args\": [\"stats_generator\",\"--train_spec={\\\"prediction_type\\\": \\\"{{$.inputs.parameters['prediction_type']}}\\\", \\\"target_column\\\": \\\"{{$.inputs.parameters['target_column_name']}}\\\", \\\"optimization_objective\\\": \\\"{{$.inputs.parameters['optimization_objective']}}\\\", \\\"weight_column_name\\\": \\\"{{$.inputs.parameters['weight_column_name']}}\\\", \\\"transformations\\\": {{$.inputs.parameters['transformations']}}}\", \"--split_spec={{$.inputs.parameters['split_spec']}}\", \"--data_source={{$.inputs.parameters['data_source']}}\", \"--target_column={{$.inputs.parameters['target_column_name']}}\", \"--request_type=COLUMN_STATS_ONLY\", \"--optimization_objective_recall_value={{$.inputs.parameters['optimization_objective_recall_value']}}\", \"--optimization_objective_precision_value={{$.inputs.parameters['optimization_objective_precision_value']}}\", \"--example_gen_gcs_output_prefix={{$.inputs.parameters['root_dir']}}/{{$.pipeline_job_uuid}}/{{$.pipeline_task_uuid}}/example_gen_output\", \"--dataset_stats_dir={{$.inputs.parameters['root_dir']}}/{{$.pipeline_job_uuid}}/{{$.pipeline_task_uuid}}/stats/\", \"--stats_result_path={{$.outputs.artifacts['dataset_stats'].uri}}\", \"--dataset_schema_path={{$.outputs.artifacts['dataset_schema'].uri}}\", \"--job_name=tabular-stats-and-example-gen-{{$.pipeline_job_uuid}}-{{$.pipeline_task_uuid}}\", \"--dataflow_project={{$.inputs.parameters['project']}}\", \"--error_file_path={{$.inputs.parameters['root_dir']}}/{{$.pipeline_job_uuid}}/{{$.pipeline_task_uuid}}/error.pb\", \"--dataflow_staging_dir={{$.inputs.parameters['root_dir']}}/{{$.pipeline_job_uuid}}/{{$.pipeline_task_uuid}}/dataflow_staging\", \"--dataflow_tmp_dir={{$.inputs.parameters['root_dir']}}/{{$.pipeline_job_uuid}}/{{$.pipeline_task_uuid}}/dataflow_tmp\", \"--dataflow_max_num_workers={{$.inputs.parameters['dataflow_max_num_workers']}}\", \"--dataflow_worker_container_image=us-docker.pkg.dev/vertex-ai/automl-tabular/dataflow-worker:prod\", \"--dataflow_machine_type={{$.inputs.parameters['dataflow_machine_type']}}\", \"--dataflow_disk_size_gb={{$.inputs.parameters['dataflow_disk_size_gb']}}\", \"--dataflow_kms_key={{$.inputs.parameters['encryption_spec_key_name']}}\", \"--dataflow_subnetwork_fully_qualified={{$.inputs.parameters['dataflow_subnetwork']}}\", \"--dataflow_use_public_ips={{$.inputs.parameters['dataflow_use_public_ips']}}\", \"--is_distill={{$.inputs.parameters['is_distill']}}\", \"--metadata_path={{$.outputs.artifacts['metadata'].uri}}\", \"--train_split={{$.outputs.artifacts['train_split'].uri}}\", \"--eval_split={{$.outputs.artifacts['eval_split'].uri}}\", \"--test_split={{$.outputs.artifacts['test_split'].uri}}\", \"--test_split_for_batch_prediction_component={{$.outputs.parameters['test_split_json'].output_file}}\", \"--downsampled_test_split_for_batch_prediction_component={{$.outputs.parameters['downsampled_test_split_json'].output_file}}\", \"--instance_baseline_path={{$.outputs.artifacts['instance_baseline'].uri}}\", \"--parse_json=true\", \"--generate_additional_downsample_test_split=true\"]}}]}}" ], "command": [ "python3", @@ -1521,4 +1524,4 @@ } } } -} +} \ No newline at end of file diff --git a/components/google-cloud/setup.py b/components/google-cloud/setup.py index 16260a737eeb..be6c628ea6a6 100644 --- a/components/google-cloud/setup.py +++ b/components/google-cloud/setup.py @@ -81,7 +81,7 @@ x.replace(relative_data_path + "/", "") for x in glob.glob(relative_data_path + "/**/*.yaml", recursive=True) + glob.glob( - relative_data_path + "/**/automl/tabular/*.json", + relative_data_path + "/**/automl/**/*.json", recursive=True) ] }, From 2494447578657a7a6a88a1cc8df2c8a811d80c40 Mon Sep 17 00:00:00 2001 From: Connor McCarthy <mccarthy.connor.james@gmail.com> Date: Thu, 14 Apr 2022 15:28:38 -0600 Subject: [PATCH 4/7] feat(sdk): add .list_pipeline_versions and .unarchive_experiment methods to Client (#7563) * add client methods * update release notes --- sdk/RELEASE.md | 1 + sdk/python/kfp/client/client.py | 30 ++++++++++++++++++++++++++---- 2 files changed, 27 insertions(+), 4 deletions(-) diff --git a/sdk/RELEASE.md b/sdk/RELEASE.md index 794727d5383a..b1124f5587ad 100644 --- a/sdk/RELEASE.md +++ b/sdk/RELEASE.md @@ -1,6 +1,7 @@ # Current Version (Still in Development) ## Major Features and Improvements +* feat(sdk): add .list_pipeline_versions and .unarchive_experiment methods to Client [\#7563](https://github.com/kubeflow/pipelines/pull/7563) ## Breaking Changes diff --git a/sdk/python/kfp/client/client.py b/sdk/python/kfp/client/client.py index 371b33c73bfc..0f787ce93fd2 100644 --- a/sdk/python/kfp/client/client.py +++ b/sdk/python/kfp/client/client.py @@ -585,16 +585,21 @@ def get_experiment(self, experiment_name)) return result.experiments[0] - def archive_experiment(self, experiment_id: str): + def archive_experiment(self, experiment_id: str) -> None: """Archives an experiment. Args: experiment_id: id of the experiment. + """ + self._experiment_api.archive_experiment(id=experiment_id) - Raises: - kfp_server_api.ApiException: If experiment is not found. + def unarchive_experiment(self, experiment_id: str) -> None: + """Unarchives an experiment. + + Args: + experiment_id: id of the experiment. """ - self._experiment_api.archive_experiment(experiment_id) + self._experiment_api.unarchive_experiment(id=experiment_id) def delete_experiment(self, experiment_id): """Delete experiment. @@ -1486,6 +1491,23 @@ def list_pipeline_versions( resource_key_id=pipeline_id, filter=filter) + def get_pipeline_version( + self, version_id: str + ) -> kfp_server_api.models.api_pipeline_version.ApiPipelineVersion: + """Gets a pipeline version. + + Args: + version_id: id of the pipeline version. + + Returns: + Object. If the method is called asynchronously, returns the request + thread. + + Raises: + kfp_server_api.ApiException: If pipeline version is not found. + """ + return self._pipelines_api.get_pipeline_version(version_id=version_id) + def delete_pipeline_version(self, version_id: str): """Deletes a pipeline version. From 88e4066ba35e81e0af04c490a9a6368132fe4ee6 Mon Sep 17 00:00:00 2001 From: Googler <nobody@google.com> Date: Thu, 14 Apr 2022 15:09:15 -0700 Subject: [PATCH 5/7] Expose `eval_frequency_secs` and `eval_steps` as separate inputs in the built-in algorithm HPT component PiperOrigin-RevId: 441865531 --- ...n_algorithm_hyperparameter_tuning_job.yaml | 12 ++++++ ...et_hyperparameter_tuning_job_pipeline.json | 38 ++++++++++++++++++- .../automl/tabular/tabnet_trainer.yaml | 7 ++-- .../experimental/automl/tabular/utils.py | 22 +++++++++-- ...ep_hyperparameter_tuning_job_pipeline.json | 38 ++++++++++++++++++- .../automl/tabular/wide_and_deep_trainer.yaml | 7 ++-- .../automl/tabular/unit/utils_test.py | 4 ++ 7 files changed, 116 insertions(+), 12 deletions(-) diff --git a/components/google-cloud/google_cloud_pipeline_components/experimental/automl/tabular/builtin_algorithm_hyperparameter_tuning_job.yaml b/components/google-cloud/google_cloud_pipeline_components/experimental/automl/tabular/builtin_algorithm_hyperparameter_tuning_job.yaml index 5d048181986f..6b430f12a252 100644 --- a/components/google-cloud/google_cloud_pipeline_components/experimental/automl/tabular/builtin_algorithm_hyperparameter_tuning_job.yaml +++ b/components/google-cloud/google_cloud_pipeline_components/experimental/automl/tabular/builtin_algorithm_hyperparameter_tuning_job.yaml @@ -28,6 +28,12 @@ description: | weight_column (Optional[str]): The weight column name. enable_profiler (Optional[bool]): Enables profiling and saves a trace during evaluation. seed (Optional[int]): Seed to be used for this run. + eval_steps (Optional[int]): Number of steps (batches) to run evaluation for. If not + specified or negative, it means run evaluation on the whole validation + dataset. If set to 0, it means run evaluation for a fixed number of + samples. + eval_frequency_secs (Optional[int]): Frequency at which evaluation and checkpointing will + take place. study_spec_metrics (list[dict]): Required. List of dictionaries representing metrics to optimize. The dictionary contains the metric_id, which is reported by the training @@ -78,6 +84,8 @@ inputs: - { name: weight_column, type: String, default: "" } - { name: enable_profiler, type: Boolean, default: "false" } - { name: seed, type: Integer, default: 1 } + - { name: eval_steps, type: Integer, default: 0 } + - { name: eval_frequency_secs, type: Integer, default: 600 } - { name: study_spec_metrics, type: JsonArray } - { name: study_spec_parameters, type: JsonArray } - { name: max_trial_count, type: Integer } @@ -181,5 +189,9 @@ implementation: { inputValue: enable_profiler }, '", "--seed=', { inputValue: seed }, + '", "--eval_steps=', + { inputValue: eval_steps }, + '", "--eval_frequency_secs=', + { inputValue: eval_frequency_secs }, '"]}}]}}', ]] diff --git a/components/google-cloud/google_cloud_pipeline_components/experimental/automl/tabular/tabnet_hyperparameter_tuning_job_pipeline.json b/components/google-cloud/google_cloud_pipeline_components/experimental/automl/tabular/tabnet_hyperparameter_tuning_job_pipeline.json index cfb2ee464a1f..24a4ae84494b 100644 --- a/components/google-cloud/google_cloud_pipeline_components/experimental/automl/tabular/tabnet_hyperparameter_tuning_job_pipeline.json +++ b/components/google-cloud/google_cloud_pipeline_components/experimental/automl/tabular/tabnet_hyperparameter_tuning_job_pipeline.json @@ -55,6 +55,12 @@ "encryption_spec_key_name": { "type": "STRING" }, + "eval_frequency_secs": { + "type": "INT" + }, + "eval_steps": { + "type": "INT" + }, "location": { "type": "STRING" }, @@ -325,6 +331,12 @@ "encryption_spec_key_name": { "componentInputParameter": "pipelineparam--encryption_spec_key_name" }, + "eval_frequency_secs": { + "componentInputParameter": "pipelineparam--eval_frequency_secs" + }, + "eval_steps": { + "componentInputParameter": "pipelineparam--eval_steps" + }, "location": { "componentInputParameter": "pipelineparam--location" }, @@ -764,6 +776,12 @@ "pipelineparam--encryption_spec_key_name": { "type": "STRING" }, + "pipelineparam--eval_frequency_secs": { + "type": "INT" + }, + "pipelineparam--eval_steps": { + "type": "INT" + }, "pipelineparam--location": { "type": "STRING" }, @@ -1100,7 +1118,7 @@ "--gcp_resources", "{{$.outputs.parameters['gcp_resources'].output_file}}", "--payload", - "{\"display_name\": \"automl-tabular-builtin-algorithm-hyperparameter-tuning-job-{{$.pipeline_job_uuid}}-{{$.pipeline_task_uuid}}\", \"encryption_spec\": {\"kms_key_name\":\"{{$.inputs.parameters['encryption_spec_key_name']}}\"}, \"study_spec\": {\"metrics\": {{$.inputs.parameters['study_spec_metrics']}}, \"parameters\": {{$.inputs.parameters['study_spec_parameters']}}, \"algorithm\": \"{{$.inputs.parameters['study_spec_algorithm']}}\", \"measurement_selection_type\": \"{{$.inputs.parameters['study_spec_measurement_selection_type']}}\"}, \"max_trial_count\": {{$.inputs.parameters['max_trial_count']}}, \"parallel_trial_count\": {{$.inputs.parameters['parallel_trial_count']}}, \"max_failed_trial_count\": {{$.inputs.parameters['max_failed_trial_count']}}, \"trial_job_spec\": {\"worker_pool_specs\": [{\"replica_count\":\"{{$.inputs.parameters['replica_count']}}\", \"machine_spec\": {{$.inputs.parameters['machine_spec']}}, \"container_spec\": {\"image_uri\":\"us-docker.pkg.dev/vertex-ai-restricted/automl-tabular/tabnet-training:prod\", \"args\": [\"--target_column={{$.inputs.parameters['target_column']}}\", \"--weight_column={{$.inputs.parameters['weight_column']}}\", \"--model_type={{$.inputs.parameters['prediction_type']}}\", \"--prediction_docker_uri=us-docker.pkg.dev/vertex-ai/automl-tabular/prediction-server:prod\", \"--prediction_docker_uri_artifact_path={{$.outputs.parameters['prediction_docker_uri_output'].output_file}}\", \"--baseline_path={{$.inputs.artifacts['instance_baseline'].uri}}\", \"--metadata_path={{$.inputs.artifacts['metadata'].uri}}\", \"--transform_output_path={{$.inputs.artifacts['transform_output'].uri}}\", \"--training_schema_path={{$.inputs.artifacts['training_schema_uri'].uri}}\", \"--instance_schema_path={{$.outputs.parameters['instance_schema_uri'].output_file}}\", \"--prediction_schema_path={{$.outputs.parameters['prediction_schema_uri'].output_file}}\", \"--trials_path={{$.outputs.parameters['trials'].output_file}}\", \"--job_dir={{$.inputs.parameters['root_dir']}}/{{$.pipeline_job_uuid}}/{{$.pipeline_task_uuid}}/train\", \"--training_data_path={{$.inputs.artifacts['materialized_train_split'].uri}}\", \"--validation_data_path={{$.inputs.artifacts['materialized_eval_split'].uri}}\", \"--test_data_path={{$.inputs.artifacts['materialized_test_split'].uri}}\", \"--enable_profiler={{$.inputs.parameters['enable_profiler']}}\", \"--seed={{$.inputs.parameters['seed']}}\"]}}]}}" + "{\"display_name\": \"automl-tabular-builtin-algorithm-hyperparameter-tuning-job-{{$.pipeline_job_uuid}}-{{$.pipeline_task_uuid}}\", \"encryption_spec\": {\"kms_key_name\":\"{{$.inputs.parameters['encryption_spec_key_name']}}\"}, \"study_spec\": {\"metrics\": {{$.inputs.parameters['study_spec_metrics']}}, \"parameters\": {{$.inputs.parameters['study_spec_parameters']}}, \"algorithm\": \"{{$.inputs.parameters['study_spec_algorithm']}}\", \"measurement_selection_type\": \"{{$.inputs.parameters['study_spec_measurement_selection_type']}}\"}, \"max_trial_count\": {{$.inputs.parameters['max_trial_count']}}, \"parallel_trial_count\": {{$.inputs.parameters['parallel_trial_count']}}, \"max_failed_trial_count\": {{$.inputs.parameters['max_failed_trial_count']}}, \"trial_job_spec\": {\"worker_pool_specs\": [{\"replica_count\":\"{{$.inputs.parameters['replica_count']}}\", \"machine_spec\": {{$.inputs.parameters['machine_spec']}}, \"container_spec\": {\"image_uri\":\"us-docker.pkg.dev/vertex-ai-restricted/automl-tabular/tabnet-training:prod\", \"args\": [\"--target_column={{$.inputs.parameters['target_column']}}\", \"--weight_column={{$.inputs.parameters['weight_column']}}\", \"--model_type={{$.inputs.parameters['prediction_type']}}\", \"--prediction_docker_uri=us-docker.pkg.dev/vertex-ai/automl-tabular/prediction-server:prod\", \"--prediction_docker_uri_artifact_path={{$.outputs.parameters['prediction_docker_uri_output'].output_file}}\", \"--baseline_path={{$.inputs.artifacts['instance_baseline'].uri}}\", \"--metadata_path={{$.inputs.artifacts['metadata'].uri}}\", \"--transform_output_path={{$.inputs.artifacts['transform_output'].uri}}\", \"--training_schema_path={{$.inputs.artifacts['training_schema_uri'].uri}}\", \"--instance_schema_path={{$.outputs.parameters['instance_schema_uri'].output_file}}\", \"--prediction_schema_path={{$.outputs.parameters['prediction_schema_uri'].output_file}}\", \"--trials_path={{$.outputs.parameters['trials'].output_file}}\", \"--job_dir={{$.inputs.parameters['root_dir']}}/{{$.pipeline_job_uuid}}/{{$.pipeline_task_uuid}}/train\", \"--training_data_path={{$.inputs.artifacts['materialized_train_split'].uri}}\", \"--validation_data_path={{$.inputs.artifacts['materialized_eval_split'].uri}}\", \"--test_data_path={{$.inputs.artifacts['materialized_test_split'].uri}}\", \"--enable_profiler={{$.inputs.parameters['enable_profiler']}}\", \"--seed={{$.inputs.parameters['seed']}}\", \"--eval_steps={{$.inputs.parameters['eval_steps']}}\", \"--eval_frequency_secs={{$.inputs.parameters['eval_frequency_secs']}}\"]}}]}}" ], "command": [ "python3", @@ -1288,6 +1306,12 @@ "pipelineparam--encryption_spec_key_name": { "componentInputParameter": "encryption_spec_key_name" }, + "pipelineparam--eval_frequency_secs": { + "componentInputParameter": "eval_frequency_secs" + }, + "pipelineparam--eval_steps": { + "componentInputParameter": "eval_steps" + }, "pipelineparam--location": { "componentInputParameter": "location" }, @@ -1388,6 +1412,12 @@ "encryption_spec_key_name": { "type": "STRING" }, + "eval_frequency_secs": { + "type": "INT" + }, + "eval_steps": { + "type": "INT" + }, "location": { "type": "STRING" }, @@ -1483,6 +1513,12 @@ "encryption_spec_key_name": { "stringValue": "" }, + "eval_frequency_secs": { + "intValue": "600" + }, + "eval_steps": { + "intValue": "0" + }, "max_failed_trial_count": { "intValue": "0" }, diff --git a/components/google-cloud/google_cloud_pipeline_components/experimental/automl/tabular/tabnet_trainer.yaml b/components/google-cloud/google_cloud_pipeline_components/experimental/automl/tabular/tabnet_trainer.yaml index 5cfb6b6603c7..895eb22b3498 100644 --- a/components/google-cloud/google_cloud_pipeline_components/experimental/automl/tabular/tabnet_trainer.yaml +++ b/components/google-cloud/google_cloud_pipeline_components/experimental/automl/tabular/tabnet_trainer.yaml @@ -71,9 +71,10 @@ description: | gamma_focal_loss (Optional[float]): Gamma value (modulating factor) for focal loss for focal loss. enable_profiler (Optional[bool]): Enables profiling and saves a trace during evaluation. seed (Optional[int]): Seed to be used for this run. - eval_steps (Optional[int]): ANumber of steps (batches) to run evaluation for. If not - specified, it means run evaluation on the whole validation dataset. This - value must be >= 1. + eval_steps (Optional[int]): Number of steps (batches) to run evaluation for. If not + specified or negative, it means run evaluation on the whole validation + dataset. If set to 0, it means run evaluation for a fixed number of + samples. batch_size (Optional[int]): Batch size for training. eval_frequency_secs (Optional[int]): Frequency at which evaluation and checkpointing will take place. diff --git a/components/google-cloud/google_cloud_pipeline_components/experimental/automl/tabular/utils.py b/components/google-cloud/google_cloud_pipeline_components/experimental/automl/tabular/utils.py index d6013d30ebdc..9c3f6c2ca442 100644 --- a/components/google-cloud/google_cloud_pipeline_components/experimental/automl/tabular/utils.py +++ b/components/google-cloud/google_cloud_pipeline_components/experimental/automl/tabular/utils.py @@ -666,8 +666,9 @@ def get_wide_and_deep_trainer_pipeline_and_parameters( enable_profiler: Enables profiling and saves a trace during evaluation. seed: Seed to be used for this run. eval_steps: Number of steps (batches) to run evaluation for. If not - specified, it means run evaluation on the whole validation dataset. This - value must be >= 1. + specified or negative, it means run evaluation on the whole validation + dataset. If set to 0, it means run evaluation for a fixed number of + samples. batch_size: Batch size for training. eval_frequency_secs: Frequency at which evaluation and checkpointing will take place. @@ -817,6 +818,8 @@ def get_builtin_algorithm_hyperparameter_tuning_job_pipeline_and_parameters( wide_and_deep: bool = False, enable_profiler: bool = False, seed: int = 1, + eval_steps: int = 0, + eval_frequency_secs: int = 600, weight_column: str = '', max_failed_trial_count: int = 0, study_spec_algorithm: str = 'ALGORITHM_UNSPECIFIED', @@ -858,6 +861,12 @@ def get_builtin_algorithm_hyperparameter_tuning_job_pipeline_and_parameters( wide_and_deep: Train Wide & Deep model. enable_profiler: Enables profiling and saves a trace during evaluation. seed: Seed to be used for this run. + eval_steps: Number of steps (batches) to run evaluation for. If not + specified or negative, it means run evaluation on the whole validation + dataset. If set to 0, it means run evaluation for a fixed number of + samples. + eval_frequency_secs: Frequency at which evaluation and checkpointing will + take place. weight_column: The weight column name. max_failed_trial_count: The number of failed trials that need to be seen before failing the HyperparameterTuningJob. If set to 0, Vertex AI decides @@ -931,6 +940,10 @@ def get_builtin_algorithm_hyperparameter_tuning_job_pipeline_and_parameters( enable_profiler, 'seed': seed, + 'eval_steps': + eval_steps, + 'eval_frequency_secs': + eval_frequency_secs, 'weight_column': weight_column, 'max_failed_trial_count': @@ -1094,8 +1107,9 @@ def get_tabnet_trainer_pipeline_and_parameters( enable_profiler: Enables profiling and saves a trace during evaluation. seed: Seed to be used for this run. eval_steps: Number of steps (batches) to run evaluation for. If not - specified, it means run evaluation on the whole validation dataset. This - value must be >= 1. + specified or negative, it means run evaluation on the whole validation + dataset. If set to 0, it means run evaluation for a fixed number of + samples. batch_size: Batch size for training. eval_frequency_secs: Frequency at which evaluation and checkpointing will take place. diff --git a/components/google-cloud/google_cloud_pipeline_components/experimental/automl/tabular/wide_and_deep_hyperparameter_tuning_job_pipeline.json b/components/google-cloud/google_cloud_pipeline_components/experimental/automl/tabular/wide_and_deep_hyperparameter_tuning_job_pipeline.json index 2bece426453b..2a032182cdac 100644 --- a/components/google-cloud/google_cloud_pipeline_components/experimental/automl/tabular/wide_and_deep_hyperparameter_tuning_job_pipeline.json +++ b/components/google-cloud/google_cloud_pipeline_components/experimental/automl/tabular/wide_and_deep_hyperparameter_tuning_job_pipeline.json @@ -55,6 +55,12 @@ "encryption_spec_key_name": { "type": "STRING" }, + "eval_frequency_secs": { + "type": "INT" + }, + "eval_steps": { + "type": "INT" + }, "location": { "type": "STRING" }, @@ -325,6 +331,12 @@ "encryption_spec_key_name": { "componentInputParameter": "pipelineparam--encryption_spec_key_name" }, + "eval_frequency_secs": { + "componentInputParameter": "pipelineparam--eval_frequency_secs" + }, + "eval_steps": { + "componentInputParameter": "pipelineparam--eval_steps" + }, "location": { "componentInputParameter": "pipelineparam--location" }, @@ -764,6 +776,12 @@ "pipelineparam--encryption_spec_key_name": { "type": "STRING" }, + "pipelineparam--eval_frequency_secs": { + "type": "INT" + }, + "pipelineparam--eval_steps": { + "type": "INT" + }, "pipelineparam--location": { "type": "STRING" }, @@ -1100,7 +1118,7 @@ "--gcp_resources", "{{$.outputs.parameters['gcp_resources'].output_file}}", "--payload", - "{\"display_name\": \"automl-tabular-builtin-algorithm-hyperparameter-tuning-job-{{$.pipeline_job_uuid}}-{{$.pipeline_task_uuid}}\", \"encryption_spec\": {\"kms_key_name\":\"{{$.inputs.parameters['encryption_spec_key_name']}}\"}, \"study_spec\": {\"metrics\": {{$.inputs.parameters['study_spec_metrics']}}, \"parameters\": {{$.inputs.parameters['study_spec_parameters']}}, \"algorithm\": \"{{$.inputs.parameters['study_spec_algorithm']}}\", \"measurement_selection_type\": \"{{$.inputs.parameters['study_spec_measurement_selection_type']}}\"}, \"max_trial_count\": {{$.inputs.parameters['max_trial_count']}}, \"parallel_trial_count\": {{$.inputs.parameters['parallel_trial_count']}}, \"max_failed_trial_count\": {{$.inputs.parameters['max_failed_trial_count']}}, \"trial_job_spec\": {\"worker_pool_specs\": [{\"replica_count\":\"{{$.inputs.parameters['replica_count']}}\", \"machine_spec\": {{$.inputs.parameters['machine_spec']}}, \"container_spec\": {\"image_uri\":\"us-docker.pkg.dev/vertex-ai-restricted/automl-tabular/wide-and-deep-training:prod\", \"args\": [\"--target_column={{$.inputs.parameters['target_column']}}\", \"--weight_column={{$.inputs.parameters['weight_column']}}\", \"--model_type={{$.inputs.parameters['prediction_type']}}\", \"--prediction_docker_uri=us-docker.pkg.dev/vertex-ai/automl-tabular/prediction-server:prod\", \"--prediction_docker_uri_artifact_path={{$.outputs.parameters['prediction_docker_uri_output'].output_file}}\", \"--baseline_path={{$.inputs.artifacts['instance_baseline'].uri}}\", \"--metadata_path={{$.inputs.artifacts['metadata'].uri}}\", \"--transform_output_path={{$.inputs.artifacts['transform_output'].uri}}\", \"--training_schema_path={{$.inputs.artifacts['training_schema_uri'].uri}}\", \"--instance_schema_path={{$.outputs.parameters['instance_schema_uri'].output_file}}\", \"--prediction_schema_path={{$.outputs.parameters['prediction_schema_uri'].output_file}}\", \"--trials_path={{$.outputs.parameters['trials'].output_file}}\", \"--job_dir={{$.inputs.parameters['root_dir']}}/{{$.pipeline_job_uuid}}/{{$.pipeline_task_uuid}}/train\", \"--training_data_path={{$.inputs.artifacts['materialized_train_split'].uri}}\", \"--validation_data_path={{$.inputs.artifacts['materialized_eval_split'].uri}}\", \"--test_data_path={{$.inputs.artifacts['materialized_test_split'].uri}}\", \"--enable_profiler={{$.inputs.parameters['enable_profiler']}}\", \"--seed={{$.inputs.parameters['seed']}}\"]}}]}}" + "{\"display_name\": \"automl-tabular-builtin-algorithm-hyperparameter-tuning-job-{{$.pipeline_job_uuid}}-{{$.pipeline_task_uuid}}\", \"encryption_spec\": {\"kms_key_name\":\"{{$.inputs.parameters['encryption_spec_key_name']}}\"}, \"study_spec\": {\"metrics\": {{$.inputs.parameters['study_spec_metrics']}}, \"parameters\": {{$.inputs.parameters['study_spec_parameters']}}, \"algorithm\": \"{{$.inputs.parameters['study_spec_algorithm']}}\", \"measurement_selection_type\": \"{{$.inputs.parameters['study_spec_measurement_selection_type']}}\"}, \"max_trial_count\": {{$.inputs.parameters['max_trial_count']}}, \"parallel_trial_count\": {{$.inputs.parameters['parallel_trial_count']}}, \"max_failed_trial_count\": {{$.inputs.parameters['max_failed_trial_count']}}, \"trial_job_spec\": {\"worker_pool_specs\": [{\"replica_count\":\"{{$.inputs.parameters['replica_count']}}\", \"machine_spec\": {{$.inputs.parameters['machine_spec']}}, \"container_spec\": {\"image_uri\":\"us-docker.pkg.dev/vertex-ai-restricted/automl-tabular/wide-and-deep-training:prod\", \"args\": [\"--target_column={{$.inputs.parameters['target_column']}}\", \"--weight_column={{$.inputs.parameters['weight_column']}}\", \"--model_type={{$.inputs.parameters['prediction_type']}}\", \"--prediction_docker_uri=us-docker.pkg.dev/vertex-ai/automl-tabular/prediction-server:prod\", \"--prediction_docker_uri_artifact_path={{$.outputs.parameters['prediction_docker_uri_output'].output_file}}\", \"--baseline_path={{$.inputs.artifacts['instance_baseline'].uri}}\", \"--metadata_path={{$.inputs.artifacts['metadata'].uri}}\", \"--transform_output_path={{$.inputs.artifacts['transform_output'].uri}}\", \"--training_schema_path={{$.inputs.artifacts['training_schema_uri'].uri}}\", \"--instance_schema_path={{$.outputs.parameters['instance_schema_uri'].output_file}}\", \"--prediction_schema_path={{$.outputs.parameters['prediction_schema_uri'].output_file}}\", \"--trials_path={{$.outputs.parameters['trials'].output_file}}\", \"--job_dir={{$.inputs.parameters['root_dir']}}/{{$.pipeline_job_uuid}}/{{$.pipeline_task_uuid}}/train\", \"--training_data_path={{$.inputs.artifacts['materialized_train_split'].uri}}\", \"--validation_data_path={{$.inputs.artifacts['materialized_eval_split'].uri}}\", \"--test_data_path={{$.inputs.artifacts['materialized_test_split'].uri}}\", \"--enable_profiler={{$.inputs.parameters['enable_profiler']}}\", \"--seed={{$.inputs.parameters['seed']}}\", \"--eval_steps={{$.inputs.parameters['eval_steps']}}\", \"--eval_frequency_secs={{$.inputs.parameters['eval_frequency_secs']}}\"]}}]}}" ], "command": [ "python3", @@ -1288,6 +1306,12 @@ "pipelineparam--encryption_spec_key_name": { "componentInputParameter": "encryption_spec_key_name" }, + "pipelineparam--eval_frequency_secs": { + "componentInputParameter": "eval_frequency_secs" + }, + "pipelineparam--eval_steps": { + "componentInputParameter": "eval_steps" + }, "pipelineparam--location": { "componentInputParameter": "location" }, @@ -1388,6 +1412,12 @@ "encryption_spec_key_name": { "type": "STRING" }, + "eval_frequency_secs": { + "type": "INT" + }, + "eval_steps": { + "type": "INT" + }, "location": { "type": "STRING" }, @@ -1483,6 +1513,12 @@ "encryption_spec_key_name": { "stringValue": "" }, + "eval_frequency_secs": { + "intValue": "600" + }, + "eval_steps": { + "intValue": "0" + }, "max_failed_trial_count": { "intValue": "0" }, diff --git a/components/google-cloud/google_cloud_pipeline_components/experimental/automl/tabular/wide_and_deep_trainer.yaml b/components/google-cloud/google_cloud_pipeline_components/experimental/automl/tabular/wide_and_deep_trainer.yaml index 55dff2ae630c..3b3461ef6bef 100644 --- a/components/google-cloud/google_cloud_pipeline_components/experimental/automl/tabular/wide_and_deep_trainer.yaml +++ b/components/google-cloud/google_cloud_pipeline_components/experimental/automl/tabular/wide_and_deep_trainer.yaml @@ -62,9 +62,10 @@ description: | dnn_beta_2 (Optional[float]): Beta 2 value for dnn_optimizer_type="adam". enable_profiler (Optional[bool]): Enables profiling and saves a trace during evaluation. seed (Optional[int]): Seed to be used for this run. - eval_steps (Optional[int]): ANumber of steps (batches) to run evaluation for. If not - specified, it means run evaluation on the whole validation dataset. This - value must be >= 1. + eval_steps (Optional[int]): Number of steps (batches) to run evaluation for. If not + specified or negative, it means run evaluation on the whole validation + dataset. If set to 0, it means run evaluation for a fixed number of + samples. batch_size (Optional[int]): Batch size for training. eval_frequency_secs (Optional[int]): Frequency at which evaluation and checkpointing will take place. diff --git a/components/google-cloud/tests/experimental/automl/tabular/unit/utils_test.py b/components/google-cloud/tests/experimental/automl/tabular/unit/utils_test.py index ffdea5d18c88..583a5976b26b 100644 --- a/components/google-cloud/tests/experimental/automl/tabular/unit/utils_test.py +++ b/components/google-cloud/tests/experimental/automl/tabular/unit/utils_test.py @@ -469,6 +469,10 @@ def test_get_builtin_algorithm_hyperparameter_tuning_job_pipeline_and_parameters False, 'seed': 1, + 'eval_steps': + 0, + 'eval_frequency_secs': + 600, 'weight_column': '', 'max_failed_trial_count': From d452261eb2125e0fa16184c7ca4e9be54cc41045 Mon Sep 17 00:00:00 2001 From: Kevin Naughton <kevinb@google.com> Date: Thu, 14 Apr 2022 15:54:03 -0700 Subject: [PATCH 6/7] Update evaluation yaml to v0.2 PiperOrigin-RevId: 441875726 --- .../experimental/evaluation/component.yaml | 92 +++++++++++++------ 1 file changed, 65 insertions(+), 27 deletions(-) diff --git a/components/google-cloud/google_cloud_pipeline_components/experimental/evaluation/component.yaml b/components/google-cloud/google_cloud_pipeline_components/experimental/evaluation/component.yaml index a92197de56c0..9e7f728597d9 100644 --- a/components/google-cloud/google_cloud_pipeline_components/experimental/evaluation/component.yaml +++ b/components/google-cloud/google_cloud_pipeline_components/experimental/evaluation/component.yaml @@ -25,28 +25,42 @@ description: | run. For prediction results, the files should be in format "prediction.results-*". For explanation results, the files should be in format "explanation.results-*". - classification_type (str): - Required for a `classification` problem_type. The type of classification problem. + ground_truth_format(Optional[str]): + Unstructured data classification. The file format for the ground truth files. + `jsonl` is currently the only allowed format. + If not set, defaulted to `jsonl`. + ground_truth_gcs_source(Optional[Sequence[str]]): + Unstructured data classification. + The GCS uris representing where the ground truth is located. + Used to provide ground truth for each prediction instance when they are not part of the batch prediction jobs prediction instance. + key_columns(Optional[Sequence[str]]): + Unstructured data classification. + The list of fields in the ground truth gcs source to format the joining key. + Used to merge prediction instances with ground truth data. + classification_type (Optional[str]): + Required only for a `classification` problem_type. The type of classification problem. Defined as `multiclass` or `multilabel`. - class_names (Sequence[str]): + If not set, defaulted to `multiclass` internally. + class_names (Optional[Sequence[str]]): The list of class names for the ground_truth_column, in the same order they appear in the batch predictions jobs predictions output file. - - For instance, if the groun_truth_column could be either `1` or `0`, and the batch - prediction jobs predictions output contains ["1", "0"] for the prediction_score_column, + For instance, if the ground_truth_column could be either `1` or `0`, and the batch + prediction jobs predictions output contains ["1", "0"] for the prediction_label_column, then the class_names input will be ["1", "0"]. + If not set, defaulted to the classes found in the prediction_label_column in the batch prediction jobs predictions file. ground_truth_column (str): The column name of the feature containing ground truth. Formatted to be able to find nested columns, delimeted by `.`. Prefixed with 'instance.' internally for Vertex Batch Prediction. - prediction_score_column (str): + prediction_score_column (Optional[str]): The column name of the field containing batch prediction scores. Formatted to be able to find nested columns, delimeted by `.`. - If not set, defaulted to `prediction.scores`. + If not set, defaulted to `prediction.scores` for a `classification` problem_type, + `prediction.value` for a `regression` problem_type. prediction_label_column (Optional[str]): Optional. The column name of the field containing classes the model is scoring. Formatted to be able to find nested columns, delimeted by `.`. - If not set, defaulted to `prediction.classes`. + If not set, defaulted to `prediction.classes` for classification. prediction_id_column (Optional[str]): Optional. The column name of the field containing ids for classes the model is scoring. Formatted to be able to find nested columns, delimeted by `.`. @@ -78,8 +92,15 @@ description: | If not set, defaulted to `10`. dataflow_max_workers_num (Optional[int]): Optional. The max number of workers executing the evaluation run. - If not set, defaulted to `100`. - + If not set, defaulted to `25`. + dataflow_subnetwork (Optional[str]): + Dataflow's fully qualified subnetwork name, when empty the default subnetwork will be + used. More details: + https://cloud.google.com/dataflow/docs/guides/specifying-networks#example_network_and_subnetwork_specifications + dataflow_use_public_ips (Optional[bool]): + Specifies whether Dataflow workers use public IP addresses. + encryption_spec_key_name (Optional[str]): + Customer-managed encryption key. Returns: evaluation_metrics (system.Metrics): System metrics artifact representing the evaluation metrics in GCS. @@ -91,26 +112,32 @@ inputs: - {name: problem_type, type: String} - {name: predictions_format, type: String, default: 'jsonl'} - {name: batch_prediction_job, type: google.VertexBatchPredictionJob} -- {name: classification_type, type: String, optional: true} -- {name: class_names, type: JsonArray} +- {name: ground_truth_format, type: String, default: 'jsonl'} +- {name: ground_truth_gcs_source, type: JsonArray, default: '{}'} +- {name: key_columns, type: JsonArray, default: '{}'} +- {name: classification_type, type: String, default: ''} +- {name: class_names, type: JsonArray, default: '{}'} - {name: ground_truth_column, type: String} -- {name: prediction_score_column, type: String, default: 'prediction.scores'} -- {name: prediction_label_column, type: String, optional: true, default: 'prediction.classes'} -- {name: prediction_id_column, type: String, optional: true, default: ''} -- {name: example_weight_column, type: String, optional: true, default: ''} -- {name: positive_classes, type: JsonArray, optional: true, default: '{}'} -- {name: generate_feature_attribution, type: Boolean, optional: true, default: False} -- {name: dataflow_service_account, type: String, optional: true} +- {name: prediction_score_column, type: String, default: ''} +- {name: prediction_label_column, type: String, default: ''} +- {name: prediction_id_column, type: String, default: ''} +- {name: example_weight_column, type: String, default: ''} +- {name: positive_classes, type: JsonArray, default: '{}'} +- {name: generate_feature_attribution, type: Boolean, default: False} +- {name: dataflow_service_account, type: String, default: ''} - {name: dataflow_disk_size, type: Integer, default: 50} - {name: dataflow_machine_type, type: String, default: 'n1-standard-4'} - {name: dataflow_workers_num, type: Integer, default: '10'} -- {name: dataflow_max_workers_num, type: Integer, default: '100'} +- {name: dataflow_max_workers_num, type: Integer, default: '25'} +- {name: dataflow_subnetwork, type: String, default: ""} +- {name: dataflow_use_public_ips, type: Boolean, default: "true"} +- {name: encryption_spec_key_name, type: String, default: ""} outputs: - {name: evaluation_metrics, type: Metrics} - {name: gcp_resources, type: String} implementation: container: - image: gcr.io/ml-pipeline/model-evaluation:v0.1 + image: gcr.io/ml-pipeline/model-evaluation:v0.2 command: - python - /main.py @@ -129,6 +156,14 @@ implementation: - {inputValue: predictions_format} - --batch_prediction_gcs_source - "{{$.inputs.artifacts['batch_prediction_job'].metadata['gcsOutputDirectory']}}" + - --ground_truth_format + - {inputValue: ground_truth_format} + - --ground_truth_gcs_source + - {inputValue: ground_truth_gcs_source} + - --key_prefix_in_prediction_dataset + - 'instance' + - --key_columns + - {inputValue: key_columns} - --root_dir - "{{$.inputs.parameters['root_dir']}}/{{$.pipeline_job_uuid}}-{{$.pipeline_task_uuid}}" - --classification_type @@ -151,11 +186,8 @@ implementation: - {inputValue: generate_feature_attribution} - --dataflow_job_prefix - 'evaluation-{{$.pipeline_job_uuid}}-{{$.pipeline_task_uuid}}' - - if: - cond: {isPresent: dataflow_service_account} - then: - - --dataflow_service_account - - {inputValue: dataflow_service_account} + - --dataflow_service_account + - {inputValue: dataflow_service_account} - --dataflow_disk_size - {inputValue: dataflow_disk_size} - --dataflow_machine_type @@ -164,6 +196,12 @@ implementation: - {inputValue: dataflow_workers_num} - --dataflow_max_workers_num - {inputValue: dataflow_max_workers_num} + - --dataflow_subnetwork + - {inputValue: dataflow_subnetwork} + - --dataflow_use_public_ips + - {inputValue: dataflow_use_public_ips} + - --kms_key_name + - {inputValue: encryption_spec_key_name} - --output_metrics_gcs_path - {outputUri: evaluation_metrics} - --gcp_resources From a85dc4f5f1f65f14bd807dec9ab25d8dafb34379 Mon Sep 17 00:00:00 2001 From: Googler <nobody@google.com> Date: Thu, 14 Apr 2022 17:54:21 -0700 Subject: [PATCH 7/7] chore(components/google-cloud): Alias create_custom_training_job_op_from_component to create_custom_training_job_from_component PiperOrigin-RevId: 441899934 --- .../v1/custom_job/__init__.py | 7 +++++-- .../v1/custom_job/utils.py | 2 ++ 2 files changed, 7 insertions(+), 2 deletions(-) diff --git a/components/google-cloud/google_cloud_pipeline_components/v1/custom_job/__init__.py b/components/google-cloud/google_cloud_pipeline_components/v1/custom_job/__init__.py index 6effe6a67e64..414043562ec7 100644 --- a/components/google-cloud/google_cloud_pipeline_components/v1/custom_job/__init__.py +++ b/components/google-cloud/google_cloud_pipeline_components/v1/custom_job/__init__.py @@ -15,6 +15,10 @@ import os +from .utils import create_custom_training_job_op_from_component +# Aliasing for better readability +create_custom_training_job_from_component = create_custom_training_job_op_from_component + try: from kfp.v2.components import load_component_from_file except ImportError: @@ -23,9 +27,8 @@ __all__ = [ 'CustomTrainingJobOp', 'create_custom_training_job_op_from_component', + 'create_custom_training_job_from_component', ] - CustomTrainingJobOp = load_component_from_file( os.path.join(os.path.dirname(__file__), 'component.yaml')) - diff --git a/components/google-cloud/google_cloud_pipeline_components/v1/custom_job/utils.py b/components/google-cloud/google_cloud_pipeline_components/v1/custom_job/utils.py index 301d0f405c75..fc5b30b9522b 100644 --- a/components/google-cloud/google_cloud_pipeline_components/v1/custom_job/utils.py +++ b/components/google-cloud/google_cloud_pipeline_components/v1/custom_job/utils.py @@ -32,6 +32,8 @@ _EXECUTOR_PLACE_HOLDER_REPLACEMENT = '{{$.json_escape[1]}}' +# This method is aliased to "create_custom_training_job_from_component" for +# better readability def create_custom_training_job_op_from_component( component_spec: Callable, # pylint: disable=g-bare-generic display_name: Optional[str] = '',