From 9e4dad3a75e53579aec5bb7953a1ad307aaaa930 Mon Sep 17 00:00:00 2001 From: Yu Shan Date: Mon, 24 Aug 2020 14:50:57 +0800 Subject: [PATCH 1/2] expose workers_per_node --- .../orca/learn/horovod/pytorch_estimator.py | 13 +++++++++---- pyzoo/zoo/orca/learn/pytorch/estimator.py | 10 +++++++--- 2 files changed, 16 insertions(+), 7 deletions(-) diff --git a/pyzoo/zoo/examples/orca/learn/horovod/pytorch_estimator.py b/pyzoo/zoo/examples/orca/learn/horovod/pytorch_estimator.py index a9082843782..8473148b8ad 100644 --- a/pyzoo/zoo/examples/orca/learn/horovod/pytorch_estimator.py +++ b/pyzoo/zoo/examples/orca/learn/horovod/pytorch_estimator.py @@ -77,12 +77,13 @@ def validation_data_creator(config): return validation_loader -def train_example(): +def train_example(workers_per_node): estimator = Estimator.from_torch( model=model_creator, optimizer=optimizer_creator, loss=nn.MSELoss, scheduler_creator=scheduler_creator, + workers_per_node=workers_per_node, config={ "lr": 1e-2, # used in optimizer_creator "hidden_size": 1, # used in model_creator @@ -96,7 +97,7 @@ def train_example(): print("validation stats: {}".format(val_stats)) # retrieve the model - model = estimator.estimator.get_model() + model = estimator.get_model() print("trained weight: % .2f, bias: % .2f" % ( model.weight.item(), model.bias.item())) @@ -128,6 +129,10 @@ def train_example(): parser.add_argument("--object_store_memory", type=str, default="4g", help="The memory to store data on local." "You can change it depending on your own cluster setting.") + parser.add_argument("--workers_per_node", type=int, default=1, + help="The number of workers to run on each node") + parser.add_argument("--local_cores", type=int, default=4, + help="The number of cores while running on local mode") args = parser.parse_args() if args.hadoop_conf: @@ -145,9 +150,9 @@ def train_example(): object_store_memory=args.object_store_memory) ray_ctx.init() else: - sc = init_spark_on_local() + sc = init_spark_on_local(cores=args.local_cores) ray_ctx = RayContext( sc=sc, object_store_memory=args.object_store_memory) ray_ctx.init() - train_example() + train_example(workers_per_node=args.workers_per_node) diff --git a/pyzoo/zoo/orca/learn/pytorch/estimator.py b/pyzoo/zoo/orca/learn/pytorch/estimator.py index 0bc5067c5a9..b8b6e20ed2e 100644 --- a/pyzoo/zoo/orca/learn/pytorch/estimator.py +++ b/pyzoo/zoo/orca/learn/pytorch/estimator.py @@ -55,6 +55,7 @@ def from_torch(*, config=None, scheduler_step_freq="batch", use_tqdm=False, + workers_per_node=1, backend="horovod"): if backend == "horovod": return PyTorchHorovodEstimatorWrapper(model_creator=model, @@ -65,7 +66,8 @@ def from_torch(*, initialization_hook=initialization_hook, config=config, scheduler_step_freq=scheduler_step_freq, - use_tqdm=use_tqdm) + use_tqdm=use_tqdm, + workers_per_node=workers_per_node) elif backend == "bigdl": return PytorchSparkEstimatorWrapper(model=model, loss=loss, @@ -87,7 +89,8 @@ def __init__(self, initialization_hook=None, config=None, scheduler_step_freq="batch", - use_tqdm=False): + use_tqdm=False, + workers_per_node=1): from zoo.orca.learn.pytorch.pytorch_horovod_estimator import PyTorchHorovodEstimator self.estimator = PyTorchHorovodEstimator(model_creator=model_creator, optimizer_creator=optimizer_creator, @@ -97,7 +100,8 @@ def __init__(self, initialization_hook=initialization_hook, config=config, scheduler_step_freq=scheduler_step_freq, - use_tqdm=use_tqdm) + use_tqdm=use_tqdm, + workers_per_node=workers_per_node) def fit(self, data, epochs=1, profile=False, reduce_results=True, info=None): """ From 30c1381da10ae8a07e89316eddd82e296a1d5ca3 Mon Sep 17 00:00:00 2001 From: Yu Shan Date: Tue, 25 Aug 2020 10:59:07 +0800 Subject: [PATCH 2/2] remove import ray to fix jenkins random fail --- pyzoo/zoo/automl/regression/xgbregressor_predictor.py | 1 - 1 file changed, 1 deletion(-) diff --git a/pyzoo/zoo/automl/regression/xgbregressor_predictor.py b/pyzoo/zoo/automl/regression/xgbregressor_predictor.py index ad25888d6cd..6e7a0294a75 100644 --- a/pyzoo/zoo/automl/regression/xgbregressor_predictor.py +++ b/pyzoo/zoo/automl/regression/xgbregressor_predictor.py @@ -21,7 +21,6 @@ import zipfile import os import shutil -import ray from zoo.automl.search.abstract import * from zoo.automl.search.RayTuneSearchEngine import RayTuneSearchEngine