many models and HTS cli (#2505)

Azure · Jul 27, 2023 · f3b8a39 · f3b8a39
1 parent 5ee8561
commit f3b8a39
Show file tree

Hide file tree

Showing 12 changed files with 329 additions and 0 deletions.
diff --git a/...ponents-cli-automl-forecasting-demand-hierarchical-timeseries-hts_evaluation_pipeline.yml b/...ponents-cli-automl-forecasting-demand-hierarchical-timeseries-hts_evaluation_pipeline.yml
@@ -0,0 +1,51 @@
+# This code is autogenerated.
+# Code is generated by running custom script: python3 readme.py
+# Any manual changes to this file may cause incorrect behavior.
+# Any manual changes will be overwritten if the code is regenerated.
+
+name: cli-jobs-pipelines-automl-cli-automl-forecasting-demand-with-pipeline-components-cli-automl-forecasting-demand-hierarchical-timeseries-hts_evaluation_pipeline
+on:
+  workflow_dispatch:
+  schedule:
+    - cron: "55 9/12 * * *"
+  pull_request:
+    branches:
+      - main
+    paths:
+      - cli/jobs/pipelines/automl/cli-automl-forecasting-demand-with-pipeline-components/cli-automl-forecasting-demand-hierarchical-timeseries/**
+      - infra/bootstrapping/**
+      - .github/workflows/cli-jobs-pipelines-automl-cli-automl-forecasting-demand-with-pipeline-components-cli-automl-forecasting-demand-hierarchical-timeseries-hts_evaluation_pipeline.yml
+      - cli/run-pipeline-jobs.sh
+      - cli/setup.sh
+concurrency:
+  group: ${{ github.workflow }}-${{ github.event.pull_request.number || github.ref }}
+  cancel-in-progress: true
+jobs:
+  build:
+    runs-on: ubuntu-latest
+    steps:
+    - name: check out repo
+      uses: actions/checkout@v2
+    - name: azure login
+      uses: azure/login@v1
+      with:
+        creds: ${{secrets.AZUREML_CREDENTIALS}}
+    - name: bootstrap resources
+      run: |
+          echo '${{ github.workflow }}-${{ github.event.pull_request.number || github.ref }}';
+          bash bootstrap.sh
+      working-directory: infra/bootstrapping
+      continue-on-error: false
+    - name: setup-cli
+      run: |
+          source "${{ github.workspace }}/infra/bootstrapping/sdk_helpers.sh";
+          source "${{ github.workspace }}/infra/bootstrapping/init_environment.sh";
+          bash setup.sh
+      working-directory: cli
+      continue-on-error: true
+    - name: run job
+      run: |
+          source "${{ github.workspace }}/infra/bootstrapping/sdk_helpers.sh";
+          source "${{ github.workspace }}/infra/bootstrapping/init_environment.sh";
+          bash -x ../../../../../run-job.sh hts_evaluation_pipeline.yml
+      working-directory: cli/jobs/pipelines/automl/cli-automl-forecasting-demand-with-pipeline-components/cli-automl-forecasting-demand-hierarchical-timeseries
diff --git a/...-components-cli-automl-forecasting-demand-many-models-many_models_evaluation_pipeline.yml b/...-components-cli-automl-forecasting-demand-many-models-many_models_evaluation_pipeline.yml
@@ -0,0 +1,51 @@
+# This code is autogenerated.
+# Code is generated by running custom script: python3 readme.py
+# Any manual changes to this file may cause incorrect behavior.
+# Any manual changes will be overwritten if the code is regenerated.
+
+name: cli-jobs-pipelines-automl-cli-automl-forecasting-demand-with-pipeline-components-cli-automl-forecasting-demand-many-models-many_models_evaluation_pipeline
+on:
+  workflow_dispatch:
+  schedule:
+    - cron: "23 6/12 * * *"
+  pull_request:
+    branches:
+      - main
+    paths:
+      - cli/jobs/pipelines/automl/cli-automl-forecasting-demand-with-pipeline-components/cli-automl-forecasting-demand-many-models/**
+      - infra/bootstrapping/**
+      - .github/workflows/cli-jobs-pipelines-automl-cli-automl-forecasting-demand-with-pipeline-components-cli-automl-forecasting-demand-many-models-many_models_evaluation_pipeline.yml
+      - cli/run-pipeline-jobs.sh
+      - cli/setup.sh
+concurrency:
+  group: ${{ github.workflow }}-${{ github.event.pull_request.number || github.ref }}
+  cancel-in-progress: true
+jobs:
+  build:
+    runs-on: ubuntu-latest
+    steps:
+    - name: check out repo
+      uses: actions/checkout@v2
+    - name: azure login
+      uses: azure/login@v1
+      with:
+        creds: ${{secrets.AZUREML_CREDENTIALS}}
+    - name: bootstrap resources
+      run: |
+          echo '${{ github.workflow }}-${{ github.event.pull_request.number || github.ref }}';
+          bash bootstrap.sh
+      working-directory: infra/bootstrapping
+      continue-on-error: false
+    - name: setup-cli
+      run: |
+          source "${{ github.workspace }}/infra/bootstrapping/sdk_helpers.sh";
+          source "${{ github.workspace }}/infra/bootstrapping/init_environment.sh";
+          bash setup.sh
+      working-directory: cli
+      continue-on-error: true
+    - name: run job
+      run: |
+          source "${{ github.workspace }}/infra/bootstrapping/sdk_helpers.sh";
+          source "${{ github.workspace }}/infra/bootstrapping/init_environment.sh";
+          bash -x ../../../../../run-job.sh many_models_evaluation_pipeline.yml
+      working-directory: cli/jobs/pipelines/automl/cli-automl-forecasting-demand-with-pipeline-components/cli-automl-forecasting-demand-many-models
diff --git a/cli/README.md b/cli/README.md
@@ -130,6 +130,8 @@ path|status|description
 [jobs/basics/hello-world-output.yml](jobs/basics/hello-world-output.yml)|[![jobs/basics/hello-world-output](https://github.com/Azure/azureml-examples/workflows/cli-jobs-basics-hello-world-output/badge.svg?branch=main)](https://github.com/Azure/azureml-examples/actions/workflows/cli-jobs-basics-hello-world-output.yml)|*no description*
 [jobs/basics/hello-world.yml](jobs/basics/hello-world.yml)|[![jobs/basics/hello-world](https://github.com/Azure/azureml-examples/workflows/cli-jobs-basics-hello-world/badge.svg?branch=main)](https://github.com/Azure/azureml-examples/actions/workflows/cli-jobs-basics-hello-world.yml)|*no description*
 [jobs/pipelines/automl/cli-automl-classification-task-bankmarketing-pipeline/pipeline.yml](jobs/pipelines/automl/cli-automl-classification-task-bankmarketing-pipeline/pipeline.yml)|[![jobs/pipelines/automl/cli-automl-classification-task-bankmarketing-pipeline/pipeline](https://github.com/Azure/azureml-examples/workflows/cli-jobs-pipelines-automl-cli-automl-classification-task-bankmarketing-pipeline-pipeline/badge.svg?branch=main)](https://github.com/Azure/azureml-examples/actions/workflows/cli-jobs-pipelines-automl-cli-automl-classification-task-bankmarketing-pipeline-pipeline.yml)|Pipeline using AutoML classification task
+[jobs/pipelines/automl/cli-automl-forecasting-demand-with-pipeline-components/cli-automl-forecasting-demand-hierarchical-timeseries/hts_evaluation_pipeline.yml](jobs/pipelines/automl/cli-automl-forecasting-demand-with-pipeline-components/cli-automl-forecasting-demand-hierarchical-timeseries/hts_evaluation_pipeline.yml)|[![jobs/pipelines/automl/cli-automl-forecasting-demand-with-pipeline-components/cli-automl-forecasting-demand-hierarchical-timeseries/hts_evaluation_pipeline](https://github.com/Azure/azureml-examples/workflows/cli-jobs-pipelines-automl-cli-automl-forecasting-demand-with-pipeline-components-cli-automl-forecasting-demand-hierarchical-timeseries-hts_evaluation_pipeline/badge.svg?branch=main)](https://github.com/Azure/azureml-examples/actions/workflows/cli-jobs-pipelines-automl-cli-automl-forecasting-demand-with-pipeline-components-cli-automl-forecasting-demand-hierarchical-timeseries-hts_evaluation_pipeline.yml)|HTS evaluation pipeline using AutoML
+[jobs/pipelines/automl/cli-automl-forecasting-demand-with-pipeline-components/cli-automl-forecasting-demand-many-models/many_models_evaluation_pipeline.yml](jobs/pipelines/automl/cli-automl-forecasting-demand-with-pipeline-components/cli-automl-forecasting-demand-many-models/many_models_evaluation_pipeline.yml)|[![jobs/pipelines/automl/cli-automl-forecasting-demand-with-pipeline-components/cli-automl-forecasting-demand-many-models/many_models_evaluation_pipeline](https://github.com/Azure/azureml-examples/workflows/cli-jobs-pipelines-automl-cli-automl-forecasting-demand-with-pipeline-components-cli-automl-forecasting-demand-many-models-many_models_evaluation_pipeline/badge.svg?branch=main)](https://github.com/Azure/azureml-examples/actions/workflows/cli-jobs-pipelines-automl-cli-automl-forecasting-demand-with-pipeline-components-cli-automl-forecasting-demand-many-models-many_models_evaluation_pipeline.yml)|Many models evaluation pipeline using AutoML
 [jobs/pipelines/automl/cli-automl-regression-housepricing-pipeline/pipeline.yml](jobs/pipelines/automl/cli-automl-regression-housepricing-pipeline/pipeline.yml)|[![jobs/pipelines/automl/cli-automl-regression-housepricing-pipeline/pipeline](https://github.com/Azure/azureml-examples/workflows/cli-jobs-pipelines-automl-cli-automl-regression-housepricing-pipeline-pipeline/badge.svg?branch=main)](https://github.com/Azure/azureml-examples/actions/workflows/cli-jobs-pipelines-automl-cli-automl-regression-housepricing-pipeline-pipeline.yml)|Pipeline using AutoML regression task
 [jobs/pipelines/automl/cli-automl-text-classification-multilabel-paper-categorization-pipeline/pipeline.yml](jobs/pipelines/automl/cli-automl-text-classification-multilabel-paper-categorization-pipeline/pipeline.yml)|[![jobs/pipelines/automl/cli-automl-text-classification-multilabel-paper-categorization-pipeline/pipeline](https://github.com/Azure/azureml-examples/workflows/cli-jobs-pipelines-automl-cli-automl-text-classification-multilabel-paper-categorization-pipeline-pipeline/badge.svg?branch=main)](https://github.com/Azure/azureml-examples/actions/workflows/cli-jobs-pipelines-automl-cli-automl-text-classification-multilabel-paper-categorization-pipeline-pipeline.yml)|Pipeline using AutoML Text Classification Multilabel task
 [jobs/pipelines/automl/cli-automl-text-classification-newsgroup-pipeline/pipeline.yml](jobs/pipelines/automl/cli-automl-text-classification-newsgroup-pipeline/pipeline.yml)|[![jobs/pipelines/automl/cli-automl-text-classification-newsgroup-pipeline/pipeline](https://github.com/Azure/azureml-examples/workflows/cli-jobs-pipelines-automl-cli-automl-text-classification-newsgroup-pipeline-pipeline/badge.svg?branch=main)](https://github.com/Azure/azureml-examples/actions/workflows/cli-jobs-pipelines-automl-cli-automl-text-classification-newsgroup-pipeline-pipeline.yml)|Pipeline using AutoML Text Classification task

diff --git a/...ting-demand-hierarchical-timeseries/How to Run this AutoML Job with CLI (Forecasting).txt b/...ting-demand-hierarchical-timeseries/How to Run this AutoML Job with CLI (Forecasting).txt
@@ -0,0 +1,9 @@
+How to Run this AutoML Job with CLI (Time-Series Forecasting)
+===============================================================
+
+
+Run the AutoML CLI command pointing to the .YML file in this folder plus the Azure ML IDs needed:
+----------------------------------------------------------------------------------------------------
+
+
+/> az ml job create --file ./hts_evaluation_pipeline.yml --workspace-name [YOUR_AZURE_WORKSPACE] --resource-group [YOUR_AZURE_RESOURCE_GROUP] --subscription [YOUR_AZURE_SUBSCRIPTION]
diff --git a/...-components/cli-automl-forecasting-demand-hierarchical-timeseries/automl_settings_hts.yml b/...-components/cli-automl-forecasting-demand-hierarchical-timeseries/automl_settings_hts.yml
@@ -0,0 +1,18 @@
+debug_log: debug.txt
+enable_early_stopping: true
+forecast_horizon: 24
+forecast_step: 24
+hierarchy_column_names:
+- group_id
+- customer_id
+hierarchy_training_level: customer_id
+label_column_name: usage
+max_trials: 25
+n_cross_validations: 3
+cv_step_size: 24
+primary_metric: normalized_root_mean_squared_error
+task: forecasting
+time_column_name: datetime
+timeout_minutes: 60
+track_child_runs: false
+trial_timeout_minutes: 5
diff --git a/...ponents/cli-automl-forecasting-demand-hierarchical-timeseries/hts_evaluation_pipeline.yml b/...ponents/cli-automl-forecasting-demand-hierarchical-timeseries/hts_evaluation_pipeline.yml
@@ -0,0 +1,85 @@
+$schema: https://azuremlschemas.azureedge.net/latest/pipelineJob.schema.json
+type: pipeline
+
+description: HTS evaluation pipeline using AutoML
+
+display_name: hts-evaluation-pipeline
+experiment_name: automl-hts-evaluation-pipeline
+
+settings:
+  default_compute: azureml:cpu-cluster
+
+inputs:
+  raw_data:
+    type: uri_folder
+    path: ../data/train
+  inference_data:
+    type: uri_folder
+    path: ../data/test
+  automl_config:
+    type: uri_file
+    path: automl_settings_hts.yml
+  max_concurrency_per_node: 2
+  parallel_step_timeout_in_seconds: 3700
+  max_nodes: 5
+  forecast_level: "customer_id"
+  forecast_mode: "rolling"
+  forecast_step: 24
+  compute_name: "cpu-cluster"
+
+outputs: 
+  forecast_results:
+    type: uri_file
+    mode: upload
+  evaluation_results:
+    type: uri_folder
+    mode: upload
+
+jobs:
+  automl_hts_training:
+    type: command
+    component: azureml://registries/azureml-preview/components/automl_hts_training
+    inputs:
+      raw_data: ${{parent.inputs.raw_data}}
+      automl_config: ${{parent.inputs.automl_config}}
+      max_nodes: ${{parent.inputs.max_nodes}}
+      max_concurrency_per_node: ${{parent.inputs.max_concurrency_per_node}}
+      parallel_step_timeout_in_seconds: ${{parent.inputs.parallel_step_timeout_in_seconds}}
+      compute_name: ${{parent.inputs.compute_name}}
+    outputs:
+      run_output:
+        type: uri_folder
+
+  automl_hts_inference:
+    type: command
+    component: azureml://registries/azureml-preview/components/automl_hts_inference
+    inputs:
+      raw_data: ${{parent.inputs.inference_data}}
+      forecast_level: ${{parent.inputs.forecast_level}}
+      max_concurrency_per_node: ${{parent.inputs.max_concurrency_per_node}}
+      parallel_step_timeout_in_seconds: ${{parent.inputs.parallel_step_timeout_in_seconds}}
+      forecast_mode: ${{parent.inputs.forecast_mode}}
+      forecast_step: ${{parent.inputs.forecast_step}}
+      max_nodes: ${{parent.inputs.max_nodes}}
+      optional_train_metadata: ${{parent.jobs.automl_hts_training.outputs.run_output}}
+      compute_name: ${{parent.inputs.compute_name}}
+    outputs:
+      run_output:
+        type: uri_folder
+      evaluation_configs:
+        type: uri_file
+      evaluation_data:
+        type: uri_file
+        path: ${{parent.outputs.forecast_results}}
+
+  compute_metrics:
+    type: command
+    component: azureml://registries/azureml/components/compute_metrics
+    inputs:
+      task: "tabular-forecasting"
+      ground_truth: ${{parent.jobs.automl_hts_inference.outputs.evaluation_data}}
+      prediction: ${{parent.jobs.automl_hts_inference.outputs.evaluation_data}}
+      evaluation_config: ${{parent.jobs.automl_hts_inference.outputs.evaluation_configs}}
+    compute: ${{parent.inputs.compute_name}}
+    outputs:
+      evaluation_result: ${{parent.outputs.evaluation_results}}
diff --git a/...toml-forecasting-demand-many-models/How to Run this AutoML Job with CLI (Forecasting).txt b/...toml-forecasting-demand-many-models/How to Run this AutoML Job with CLI (Forecasting).txt
@@ -0,0 +1,9 @@
+How to Run this AutoML Job with CLI (Time-Series Forecasting)
+===============================================================
+
+
+Run the AutoML CLI command pointing to the .YML file in this folder plus the Azure ML IDs needed:
+----------------------------------------------------------------------------------------------------
+
+
+/> az ml job create --file ./many_models_evaluation_pipeline.yml --workspace-name [YOUR_AZURE_WORKSPACE] --resource-group [YOUR_AZURE_RESOURCE_GROUP] --subscription [YOUR_AZURE_SUBSCRIPTION]
diff --git a/...line-components/cli-automl-forecasting-demand-many-models/automl_settings_many_models.yml b/...line-components/cli-automl-forecasting-demand-many-models/automl_settings_many_models.yml
@@ -0,0 +1,19 @@
+allow_multi_partitions: false
+debug_log: debug.txt
+enable_early_stopping: true
+forecast_horizon: 24
+forecast_step: 24
+label_column_name: usage
+max_trials: 25
+n_cross_validations: 3
+cv_step_size: 24
+partition_column_names:
+- customer_id
+primary_metric: normalized_root_mean_squared_error
+task: forecasting
+time_column_name: datetime
+time_series_id_column_names:
+- customer_id
+timeout_minutes: 60
+track_child_runs: false
+trial_timeout_minutes: 5
diff --git a/...-components/cli-automl-forecasting-demand-many-models/many_models_evaluation_pipeline.yml b/...-components/cli-automl-forecasting-demand-many-models/many_models_evaluation_pipeline.yml
@@ -0,0 +1,85 @@
+$schema: https://azuremlschemas.azureedge.net/latest/pipelineJob.schema.json
+type: pipeline
+
+description: Many models evaluation pipeline using AutoML
+
+display_name: mm-evaluation-pipeline
+experiment_name: automl-mm-evaluation-pipeline
+
+settings:
+  default_compute: azureml:cpu-cluster
+
+inputs:
+  raw_data:
+    type: uri_folder
+    path: ../data/train
+  inference_data:
+    type: uri_folder
+    path: ../data/test
+  automl_config:
+    type: uri_file
+    path: automl_settings_many_models.yml
+  max_concurrency_per_node: 2
+  parallel_step_timeout_in_seconds: 3700
+  max_nodes: 5
+  retrain_failed_model: true
+  forecast_mode: "rolling"
+  forecast_step: 24
+  compute_name: "cpu-cluster"
+
+outputs: 
+  forecast_results:
+    type: uri_file
+    mode: upload
+  evaluation_results:
+    type: uri_folder
+    mode: upload
+
+jobs:
+  automl_mm_training:
+    type: command
+    component: azureml://registries/azureml-preview/components/automl_many_models_training
+    inputs:
+      raw_data: ${{parent.inputs.raw_data}}
+      automl_config: ${{parent.inputs.automl_config}}
+      max_nodes: ${{parent.inputs.max_nodes}}
+      max_concurrency_per_node: ${{parent.inputs.max_concurrency_per_node}}
+      parallel_step_timeout_in_seconds: ${{parent.inputs.parallel_step_timeout_in_seconds}}
+      retrain_failed_model: ${{parent.inputs.retrain_failed_model}}
+      compute_name: ${{parent.inputs.compute_name}}
+    outputs:
+      run_output:
+        type: uri_folder
+
+  automl_mm_inference:
+    type: command
+    component: azureml://registries/azureml-preview/components/automl_many_models_inference
+    inputs:
+      raw_data: ${{parent.inputs.inference_data}}
+      max_concurrency_per_node: ${{parent.inputs.max_concurrency_per_node}}
+      parallel_step_timeout_in_seconds: ${{parent.inputs.parallel_step_timeout_in_seconds}}
+      forecast_mode: ${{parent.inputs.forecast_mode}}
+      forecast_step: ${{parent.inputs.forecast_step}}
+      max_nodes: ${{parent.inputs.max_nodes}}
+      optional_train_metadata: ${{parent.jobs.automl_mm_training.outputs.run_output}}
+      compute_name: ${{parent.inputs.compute_name}}
+    outputs:
+      run_output:
+        type: uri_folder
+      evaluation_configs:
+        type: uri_file
+      evaluation_data:
+        type: uri_file
+        path: ${{parent.outputs.forecast_results}}
+
+  compute_metrics:
+    type: command
+    component: azureml://registries/azureml/components/compute_metrics
+    inputs:
+      task: "tabular-forecasting"
+      ground_truth: ${{parent.jobs.automl_mm_inference.outputs.evaluation_data}}
+      prediction: ${{parent.jobs.automl_mm_inference.outputs.evaluation_data}}
+      evaluation_config: ${{parent.jobs.automl_mm_inference.outputs.evaluation_configs}}
+    compute: ${{parent.inputs.compute_name}}
+    outputs:
+      evaluation_result: ${{parent.outputs.evaluation_results}}
diff --git a/...ng-demand-with-pipeline-components/data/inference/uci_electro_small_cli_inference.parquet b/...ng-demand-with-pipeline-components/data/inference/uci_electro_small_cli_inference.parquet
diff --git a/...-forecasting-demand-with-pipeline-components/data/test/uci_electro_small_cli_test.parquet b/...-forecasting-demand-with-pipeline-components/data/test/uci_electro_small_cli_test.parquet
diff --git a/...orecasting-demand-with-pipeline-components/data/train/uci_electro_small_cli_train.parquet b/...orecasting-demand-with-pipeline-components/data/train/uci_electro_small_cli_train.parquet