Add workflow id to Olive (microsoft#1179)

## Describe your changes Workflow id feature is a prerequisite for running workflow on a remote vm feature. By adding this feature: - Cache dir will become `<cache_dir>/<workflow_id>` - OLive config will be automatically saved to cache dir. - User can specify `workflow_id` in config file. - The default workflow_id is `default_workflow`. ## Checklist before requesting a review - [ ] Add unit tests for this change. - [ ] Make sure all tests can pass. - [ ] Update documents if necessary. - [ ] Lint and apply fixes to your code by running `lintrunner -a` - [ ] Is this a user-facing change? If yes, give a description of this change to be included in the release notes. - [ ] Is this PR including examples changes? If yes, please remember to update [example documentation](https://github.com/microsoft/Olive/blob/main/docs/source/examples.md) in a follow-up PR. ## (Optional) Issue link
yuwenzho · May 31, 2024 · 8dbf3e3 · 8dbf3e3
1 parent ecb5d58
commit 8dbf3e3
Show file tree

Hide file tree

Showing 5 changed files with 34 additions and 5 deletions.
diff --git a/docs/source/overview/options.md b/docs/source/overview/options.md
@@ -6,6 +6,7 @@ to apply from user in the form of a json dictionary. In this document, we docume
 
 The options are organized into following sections:
 
+- [Workflow id](#workflow-id) `workflow_id`
 - [Azure ML client](#azure-ml-client) `azureml_client`
 - [Input Model Information](#input-model-information) `input_model`
 - [Data Information](#data-information) `data_root`
@@ -14,6 +15,10 @@ The options are organized into following sections:
 - [Passes Information](#passes-information) `passes`
 - [Engine Information](#engine-information) `engine`
 
+## Workflow ID
+
+You can name the workflow run by specifying `workflow_id` section in your config file. Olive will save the cache under `<cache_dir>/<workflow_id>` folder, and automatically save the current running config in the cache folder.
+
 ## Azure ML Client
 
 If you will use Azure ML resources and assets, you need to provide your Azure ML client configurations. For example:

diff --git a/olive/common/constants.py b/olive/common/constants.py
@@ -0,0 +1,6 @@
+# -------------------------------------------------------------------------
+# Copyright (c) Microsoft Corporation. All rights reserved.
+# Licensed under the MIT License.
+# --------------------------------------------------------------------------
+
+DEFAULT_WORKFLOW_ID = "default_workflow"
diff --git a/olive/engine/engine.py b/olive/engine/engine.py
@@ -13,6 +13,7 @@
 
 import olive.cache as cache_utils
 from olive.common.config_utils import validate_config
+from olive.common.constants import DEFAULT_WORKFLOW_ID
 from olive.common.utils import hash_dict
 from olive.engine.config import FAILED_CONFIG, INVALID_CONFIG, PRUNED_CONFIGS
 from olive.engine.footprint import Footprint, FootprintNodeMetric
@@ -44,6 +45,7 @@ class Engine:
 
     def __init__(
         self,
+        workflow_id: str = DEFAULT_WORKFLOW_ID,
         search_strategy: Optional[Union[Dict[str, Any], SearchStrategyConfig]] = None,
         host: Optional[Union[Dict[str, Any], "SystemConfig"]] = None,
         target: Optional[Union[Dict[str, Any], "SystemConfig"]] = None,
@@ -77,7 +79,7 @@ def __init__(
         # default evaluator
         self.evaluator_config = validate_config(evaluator, OliveEvaluatorConfig) if evaluator else None
 
-        self.cache_dir = cache_dir
+        self.cache_dir = str(Path(cache_dir) / workflow_id)
         self.clean_cache = clean_cache
         self.clean_evaluation_cache = clean_evaluation_cache
         self.plot_pareto_frontier = plot_pareto_frontier
@@ -975,6 +977,14 @@ def get_evaluation_json_path(self, model_id: str):
         """Get the path to the evaluation json."""
         return self._evaluation_cache_path / f"{model_id}.json"
 
+    def save_olive_config(self, olive_config: dict):
+        """Save the olive config to the output directory."""
+        olive_config_path = Path(self.cache_dir) / "olive_config.json"
+        olive_config_path.parent.mkdir(parents=True, exist_ok=True)
+        with olive_config_path.open("w") as f:
+            json.dump(olive_config, f, indent=4)
+        logger.info("Saved Olive config to %s", olive_config_path)
+
     def _cache_evaluation(self, model_id: str, signal: MetricResult):
         """Cache the evaluation in the cache directory."""
         evaluation_json = {

diff --git a/olive/workflows/run/config.py b/olive/workflows/run/config.py
@@ -9,6 +9,7 @@
 from olive.auto_optimizer import AutoOptimizerConfig
 from olive.azureml.azureml_client import AzureMLClientConfig
 from olive.common.config_utils import ConfigBase, validate_config
+from olive.common.constants import DEFAULT_WORKFLOW_ID
 from olive.common.pydantic_v1 import validator
 from olive.data.config import DataConfig
 from olive.data.container.huggingface_container import HuggingfaceContainer
@@ -41,12 +42,13 @@ class RunEngineConfig(EngineConfig):
     ort_py_log_severity_level: int = 3
     log_to_file: bool = False
 
-    def create_engine(self, azureml_client_config):
+    def create_engine(self, azureml_client_config, workflow_id):
         config = self.dict(include=EngineConfig.__fields__.keys())
-        return Engine(**config, azureml_client_config=azureml_client_config)
+        return Engine(**config, azureml_client_config=azureml_client_config, workflow_id=workflow_id)
 
 
 class RunConfig(ConfigBase):
+    workflow_id: str = DEFAULT_WORKFLOW_ID
     azureml_client: AzureMLClientConfig = None
     input_model: ModelConfig
     systems: Dict[str, SystemConfig] = None

diff --git a/olive/workflows/run/run.py b/olive/workflows/run/run.py
@@ -134,6 +134,9 @@ def run_engine(package_config: OlivePackageConfig, run_config: RunConfig, data_r
 
     from olive.passes import Pass
 
+    workflow_id = run_config.workflow_id
+    logger.info("Running workflow %s", workflow_id)
+
     # for onnxruntime
     # ort_py_log_severity_level: python logging levels
     set_ort_logger_severity(run_config.engine.ort_py_log_severity_level)
@@ -145,7 +148,10 @@ def run_engine(package_config: OlivePackageConfig, run_config: RunConfig, data_r
     input_model = run_config.input_model
 
     # Azure ML Client
-    engine = run_config.engine.create_engine(run_config.azureml_client)
+    engine = run_config.engine.create_engine(run_config.azureml_client, workflow_id)
+
+    olive_config = run_config.to_json()
+    engine.save_olive_config(olive_config)
 
     # run_config file will be uploaded to AML job
     is_azureml_system = (run_config.engine.host is not None and run_config.engine.host.type == SystemType.AzureML) or (
@@ -155,7 +161,7 @@ def run_engine(package_config: OlivePackageConfig, run_config: RunConfig, data_r
     if is_azureml_system:
         from olive.systems.azureml.aml_system import AzureMLSystem
 
-        AzureMLSystem.olive_config = run_config.to_json()
+        AzureMLSystem.olive_config = olive_config
 
     auto_optimizer_enabled = (
         not run_config.passes