Skip to content

Commit

Permalink
Add workflow id to Olive (microsoft#1179)
Browse files Browse the repository at this point in the history
## Describe your changes

Workflow id feature is a prerequisite for running workflow on a remote
vm feature.
By adding this feature:

- Cache dir will become `<cache_dir>/<workflow_id>`
- OLive config will be automatically saved to cache dir.
- User can specify `workflow_id` in config file.
- The default workflow_id is `default_workflow`.

## Checklist before requesting a review
- [ ] Add unit tests for this change.
- [ ] Make sure all tests can pass.
- [ ] Update documents if necessary.
- [ ] Lint and apply fixes to your code by running `lintrunner -a`
- [ ] Is this a user-facing change? If yes, give a description of this
change to be included in the release notes.
- [ ] Is this PR including examples changes? If yes, please remember to
update [example
documentation](https://github.com/microsoft/Olive/blob/main/docs/source/examples.md)
in a follow-up PR.

## (Optional) Issue link
  • Loading branch information
xiaoyu-work authored May 31, 2024
1 parent ecb5d58 commit 8dbf3e3
Show file tree
Hide file tree
Showing 5 changed files with 34 additions and 5 deletions.
5 changes: 5 additions & 0 deletions docs/source/overview/options.md
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@ to apply from user in the form of a json dictionary. In this document, we docume

The options are organized into following sections:

- [Workflow id](#workflow-id) `workflow_id`
- [Azure ML client](#azure-ml-client) `azureml_client`
- [Input Model Information](#input-model-information) `input_model`
- [Data Information](#data-information) `data_root`
Expand All @@ -14,6 +15,10 @@ The options are organized into following sections:
- [Passes Information](#passes-information) `passes`
- [Engine Information](#engine-information) `engine`

## Workflow ID

You can name the workflow run by specifying `workflow_id` section in your config file. Olive will save the cache under `<cache_dir>/<workflow_id>` folder, and automatically save the current running config in the cache folder.

## Azure ML Client

If you will use Azure ML resources and assets, you need to provide your Azure ML client configurations. For example:
Expand Down
6 changes: 6 additions & 0 deletions olive/common/constants.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
# -------------------------------------------------------------------------
# Copyright (c) Microsoft Corporation. All rights reserved.
# Licensed under the MIT License.
# --------------------------------------------------------------------------

DEFAULT_WORKFLOW_ID = "default_workflow"
12 changes: 11 additions & 1 deletion olive/engine/engine.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@

import olive.cache as cache_utils
from olive.common.config_utils import validate_config
from olive.common.constants import DEFAULT_WORKFLOW_ID
from olive.common.utils import hash_dict
from olive.engine.config import FAILED_CONFIG, INVALID_CONFIG, PRUNED_CONFIGS
from olive.engine.footprint import Footprint, FootprintNodeMetric
Expand Down Expand Up @@ -44,6 +45,7 @@ class Engine:

def __init__(
self,
workflow_id: str = DEFAULT_WORKFLOW_ID,
search_strategy: Optional[Union[Dict[str, Any], SearchStrategyConfig]] = None,
host: Optional[Union[Dict[str, Any], "SystemConfig"]] = None,
target: Optional[Union[Dict[str, Any], "SystemConfig"]] = None,
Expand Down Expand Up @@ -77,7 +79,7 @@ def __init__(
# default evaluator
self.evaluator_config = validate_config(evaluator, OliveEvaluatorConfig) if evaluator else None

self.cache_dir = cache_dir
self.cache_dir = str(Path(cache_dir) / workflow_id)
self.clean_cache = clean_cache
self.clean_evaluation_cache = clean_evaluation_cache
self.plot_pareto_frontier = plot_pareto_frontier
Expand Down Expand Up @@ -975,6 +977,14 @@ def get_evaluation_json_path(self, model_id: str):
"""Get the path to the evaluation json."""
return self._evaluation_cache_path / f"{model_id}.json"

def save_olive_config(self, olive_config: dict):
"""Save the olive config to the output directory."""
olive_config_path = Path(self.cache_dir) / "olive_config.json"
olive_config_path.parent.mkdir(parents=True, exist_ok=True)
with olive_config_path.open("w") as f:
json.dump(olive_config, f, indent=4)
logger.info("Saved Olive config to %s", olive_config_path)

def _cache_evaluation(self, model_id: str, signal: MetricResult):
"""Cache the evaluation in the cache directory."""
evaluation_json = {
Expand Down
6 changes: 4 additions & 2 deletions olive/workflows/run/config.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@
from olive.auto_optimizer import AutoOptimizerConfig
from olive.azureml.azureml_client import AzureMLClientConfig
from olive.common.config_utils import ConfigBase, validate_config
from olive.common.constants import DEFAULT_WORKFLOW_ID
from olive.common.pydantic_v1 import validator
from olive.data.config import DataConfig
from olive.data.container.huggingface_container import HuggingfaceContainer
Expand Down Expand Up @@ -41,12 +42,13 @@ class RunEngineConfig(EngineConfig):
ort_py_log_severity_level: int = 3
log_to_file: bool = False

def create_engine(self, azureml_client_config):
def create_engine(self, azureml_client_config, workflow_id):
config = self.dict(include=EngineConfig.__fields__.keys())
return Engine(**config, azureml_client_config=azureml_client_config)
return Engine(**config, azureml_client_config=azureml_client_config, workflow_id=workflow_id)


class RunConfig(ConfigBase):
workflow_id: str = DEFAULT_WORKFLOW_ID
azureml_client: AzureMLClientConfig = None
input_model: ModelConfig
systems: Dict[str, SystemConfig] = None
Expand Down
10 changes: 8 additions & 2 deletions olive/workflows/run/run.py
Original file line number Diff line number Diff line change
Expand Up @@ -134,6 +134,9 @@ def run_engine(package_config: OlivePackageConfig, run_config: RunConfig, data_r

from olive.passes import Pass

workflow_id = run_config.workflow_id
logger.info("Running workflow %s", workflow_id)

# for onnxruntime
# ort_py_log_severity_level: python logging levels
set_ort_logger_severity(run_config.engine.ort_py_log_severity_level)
Expand All @@ -145,7 +148,10 @@ def run_engine(package_config: OlivePackageConfig, run_config: RunConfig, data_r
input_model = run_config.input_model

# Azure ML Client
engine = run_config.engine.create_engine(run_config.azureml_client)
engine = run_config.engine.create_engine(run_config.azureml_client, workflow_id)

olive_config = run_config.to_json()
engine.save_olive_config(olive_config)

# run_config file will be uploaded to AML job
is_azureml_system = (run_config.engine.host is not None and run_config.engine.host.type == SystemType.AzureML) or (
Expand All @@ -155,7 +161,7 @@ def run_engine(package_config: OlivePackageConfig, run_config: RunConfig, data_r
if is_azureml_system:
from olive.systems.azureml.aml_system import AzureMLSystem

AzureMLSystem.olive_config = run_config.to_json()
AzureMLSystem.olive_config = olive_config

auto_optimizer_enabled = (
not run_config.passes
Expand Down

0 comments on commit 8dbf3e3

Please sign in to comment.