diff --git a/docs/readthedocs/source/_static/js/chronos_tutorial.js b/docs/readthedocs/source/_static/js/chronos_tutorial.js index 978b3fd8d31..1c66daf1a24 100644 --- a/docs/readthedocs/source/_static/js/chronos_tutorial.js +++ b/docs/readthedocs/source/_static/js/chronos_tutorial.js @@ -51,7 +51,8 @@ $(".checkboxes").click(function(){ var ids = ["ChronosForecaster","TuneaForecasting","AutoTSEstimator","AutoWIDE", "MultvarWIDE","MultstepWIDE","LSTMForecaster","AutoProphet","AnomalyDetection", "DeepARmodel","TFTmodel","hyperparameter","taxiDataset","distributedFashion", - "ONNX","Quantize","TCMFForecaster","PenalizeUnderestimation"]; + "ONNX","Quantize","TCMFForecaster","PenalizeUnderestimation", + "GPUtrainingCPUacceleration"]; showTutorials(ids); var disIds = ["simulation"]; disCheck(disIds); @@ -94,7 +95,7 @@ $(".checkboxes").click(function(){ disCheck(disIds); } else if(vals.includes("customized_model")){ - var ids = ["AutoTSEstimator","DeepARmodel","TFTmodel"]; + var ids = ["AutoTSEstimator","DeepARmodel","TFTmodel", "GPUtrainingCPUacceleration"]; showTutorials(ids); var disIds = ["anomaly_detection","simulation","onnxruntime","quantization","distributed"]; disCheck(disIds); @@ -114,7 +115,7 @@ $(".checkboxes").click(function(){ disCheck(disIds); } else if(vals.includes("forecast") && vals.includes("customized_model")){ - var ids = ["DeepARmodel","TFTmodel","AutoTSEstimator"]; + var ids = ["DeepARmodel","TFTmodel","AutoTSEstimator","GPUtrainingCPUacceleration"]; showTutorials(ids); var disIds = ["anomaly_detection","simulation","onnxruntime","quantization","distributed"]; disCheck(disIds); diff --git a/docs/readthedocs/source/doc/Chronos/QuickStart/index.md b/docs/readthedocs/source/doc/Chronos/QuickStart/index.md index 8ac4ae56f29..c1aa7bb2219 100644 --- a/docs/readthedocs/source/doc/Chronos/QuickStart/index.md +++ b/docs/readthedocs/source/doc/Chronos/QuickStart/index.md @@ -244,6 +244,16 @@
+
+ + Accelerate the inference speed of model trained on other platform +

Tag:  

+
+ View source on GitHub +

In this example, we show an example to train the model on GPU and accelerate the model by using onnxruntime on CPU.

+
+
+ diff --git a/python/chronos/example/inference-acceleration/cpu_inference_acceleration.py b/python/chronos/example/inference-acceleration/cpu_inference_acceleration.py new file mode 100644 index 00000000000..3e065aa2087 --- /dev/null +++ b/python/chronos/example/inference-acceleration/cpu_inference_acceleration.py @@ -0,0 +1,90 @@ +# +# Copyright 2016 The BigDL Authors. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# +import torch +from bigdl.chronos.pytorch import TSTrainer as Trainer +from bigdl.chronos.model.tcn import model_creator +from bigdl.chronos.metric.forecast_metrics import Evaluator +from bigdl.chronos.data.repo_dataset import get_public_dataset +from sklearn.preprocessing import StandardScaler + +def gen_dataloader(): + tsdata_train, tsdata_val,\ + tsdata_test = get_public_dataset(name='nyc_taxi', + with_split=True, + val_ratio=0.1, + test_ratio=0.1) + + stand = StandardScaler() + for tsdata in [tsdata_train, tsdata_val, tsdata_test]: + tsdata.deduplicate()\ + .impute()\ + .gen_dt_feature()\ + .scale(stand, fit=tsdata is tsdata_train)\ + .roll(lookback=48,horizon=1) + + tsdata_traindataloader = tsdata_train.to_torch_data_loader(batch_size=32) + tsdata_valdataloader = tsdata_val.to_torch_data_loader(batch_size=32, shuffle=False) + tsdata_testdataloader = tsdata_test.to_torch_data_loader(batch_size=32, shuffle=False) + + return tsdata_traindataloader, tsdata_valdataloader, tsdata_testdataloader + +def predict_wraper(model, input_sample): + model(input_sample) + +if __name__ == '__main__': + + # create data loaders for train/valid/test + tsdata_traindataloader,\ + tsdata_valdataloader,\ + tsdata_testdataloader = gen_dataloader() + + # create a model + # This could be an arbitrary model, we choose to use a built-in model TCN here + config = {'input_feature_num':8, + 'output_feature_num':1, + 'past_seq_len':48, + 'future_seq_len':1, + 'kernel_size':3, + 'repo_initialization':True, + 'dropout':0.1, + 'seed': 0, + 'num_channels':[30]*7 + } + model = model_creator(config) + loss = torch.nn.MSELoss() + optimizer = torch.optim.Adam(lr=0.001, params=model.parameters()) + lit_model = Trainer.compile(model, loss, optimizer) + + # train the model + # You may use any method to train the model either on gpu or cpu + trainer = Trainer(max_epochs=3, + accelerator='gpu', + devices=1, + ) + trainer.fit(lit_model, tsdata_traindataloader, tsdata_testdataloader) + + # get an input sample + x = None + for x, _ in tsdata_traindataloader: + break + input_sample = x[0].unsqueeze(0) + + # speed up the model using Chronos TSTrainer + speed_model = Trainer.trace(lit_model, accelerator="onnxruntime", input_sample=input_sample) + + # evaluate the model's latency + print("original pytorch latency (ms):", Evaluator.get_latency(predict_wraper, lit_model, input_sample)) + print("onnxruntime latency (ms):", Evaluator.get_latency(predict_wraper, speed_model, input_sample)) diff --git a/python/chronos/example/inference-acceleration/readme.md b/python/chronos/example/inference-acceleration/readme.md new file mode 100644 index 00000000000..2fe7848e46e --- /dev/null +++ b/python/chronos/example/inference-acceleration/readme.md @@ -0,0 +1,20 @@ +# Accelerate the inference speed of model trained on other platform + +## Introduction +Chronos has many built-in models wrapped in forecasters, detectors and simulators optimized on CPU (especially intel CPU) platform. + +While users may want to use their own model or built-in models trained on another platform (e.g. GPU) but prefer to carry out the inferencing process on CPU platform. Chronos can also help users to accelerate their model for inferencing. + +In this example, we show an example to train the model on GPU and accelerate the model by using onnxruntime on CPU. + +## How to run this example +```bash +python cpu_inference_acceleration.py +``` + +## Sample output +```bash +Epoch 2: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████| 288/288 +original pytorch latency (ms): {'p50': 1.236, 'p90': 1.472, 'p95': 1.612, 'p99': 32.989} +onnxruntime latency (ms): {'p50': 0.124, 'p90': 0.129, 'p95': 0.148, 'p99': 0.363} +``` \ No newline at end of file