Skip to content

Commit

Permalink
pytorch doc and example update. (intel-analytics#1560)
Browse files Browse the repository at this point in the history
* pytorch doc

* update example

* remove transfomer

* doc and example update

* fix sample_input

* check none

* param update

* check trace
  • Loading branch information
YY-OnCall committed Aug 15, 2019
1 parent 179e8b1 commit 2e38858
Show file tree
Hide file tree
Showing 6 changed files with 50 additions and 57 deletions.
Original file line number Diff line number Diff line change
@@ -1,25 +1,22 @@
## Torch ResNet Prediction Example

TorchNet wraps a TorchScript model as a single layer, thus the Pytorch model can be used for
distributed inference. This example illustrates that a PyTorch program, with One line of change,
TorchNet wraps a Pytorch model as Analytics Zoo module, thus the Pytorch model can be used for
distributed inference. This example illustrates that a PyTorch program, with few lines of change,
can be executed on Apache Spark.

## Install or download Analytics Zoo
Follow the instructions [here](https://analytics-zoo.github.io/master/#PythonUserGuide/install/) to install analytics-zoo via __pip__ or __download the prebuilt package__.
Follow the instructions [here](https://analytics-zoo.github.io/master/#PythonUserGuide/install/)
to install analytics-zoo via __pip__ or __download the prebuilt package__.

## Model and Data Preparation

1. Prepare the image dataset for inference. Put the images to do prediction in the same folder.

We use ResNet 18 from torchvision and run inference on some images, e.g. images from ImageNet.

## Run this example after pip install
```bash
python predict.py --image path_to_image_folder
```

__Options:__
* `--image` The path where the images are stored.

## Run this example with prebuilt package
```bash
export SPARK_HOME=the root directory of Spark
Expand Down
15 changes: 15 additions & 0 deletions python/orca/example/torchmodel/inference/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,15 @@
#
# Copyright 2018 Analytics Zoo Authors.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#
Original file line number Diff line number Diff line change
Expand Up @@ -49,10 +49,6 @@ def predict(img_path):
parser.add_option("--image", type=str, dest="img_path",
help="The path where the images are stored, "
"can be either a folder or an image path")
parser.add_option("--model", type=str, dest="model_path",
help="The path of the TensorFlow object detection model")
parser.add_option("--partition_num", type=int, dest="partition_num", default=4,
help="The number of partitions")
(options, args) = parser.parse_args(sys.argv)

sc = init_nncontext("Torch ResNet Prediction Example")
Expand Down
8 changes: 3 additions & 5 deletions python/orca/example/torchmodel/train/Lenet_mnist.py
Original file line number Diff line number Diff line change
Expand Up @@ -49,8 +49,7 @@ def forward(self, x):


if __name__ == '__main__':
sparkConf = init_spark_conf().setAppName("test_pytorch_lenet").setMaster("local[1]")\
.set('spark.driver.memory', '10g')
sparkConf = init_spark_conf().setAppName("test_pytorch_lenet")
sc = init_nncontext(sparkConf)
spark = SparkSession.builder.config(conf=sparkConf).getOrCreate()

Expand All @@ -70,9 +69,8 @@ def lossFunc(input, target):
return nn.CrossEntropyLoss().forward(input, target.flatten().long())

torch_model = LeNet()
model = TorchNet.from_pytorch(module=torch_model, input_shape=[1, 1, 28, 28])
criterion = TorchCriterion.from_pytorch(loss=lossFunc, input_shape=[1, 10],
sample_label=torch.LongTensor([5]))
model = TorchNet.from_pytorch(torch_model, [1, 1, 28, 28])
criterion = TorchCriterion.from_pytorch(lossFunc, [1, 10], torch.LongTensor([5]))
classifier = NNClassifier(model, criterion, SeqToTensor([1, 28, 28])) \
.setBatchSize(64) \
.setOptimMethod(Adam()) \
Expand Down
61 changes: 25 additions & 36 deletions python/orca/example/torchmodel/train/SimpleTrainingExample.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,66 +13,55 @@
# See the License for the specific language governing permissions and
# limitations under the License.
#
import torch
import torch.nn as nn
import torch.nn.functional as F
from torch.nn.modules.loss import BCELoss
from bigdl.nn.criterion import *
from bigdl.nn.layer import *
from bigdl.optim.optimizer import Adam
from pyspark.sql.types import *
from zoo import init_nncontext
from zoo.common.nncontext import *
from zoo.pipeline.api.net.torch_net import TorchNet, TorchIdentityCriterion
from zoo.pipeline.api.net.torch_net import TorchNet
from zoo.pipeline.api.net.torch_criterion import TorchCriterion
from zoo.pipeline.nnframes import *


# create training data as Spark DataFrame
def get_df(sqlContext):
data = sc.parallelize([
((2.0, 1.0), 1.0),
((1.0, 2.0), 0.0),
((2.0, 1.0), 1.0),
((1.0, 2.0), 0.0)])

schema = StructType([
StructField("features", ArrayType(DoubleType(), False), False),
StructField("label", DoubleType(), False)])
df = sqlContext.createDataFrame(data, schema)
return df
from pyspark.ml.linalg import Vectors
from pyspark.sql import SparkSession


# define model with Pytorch
class SimpleTorchModel(nn.Module):
def __init__(self):
super(SimpleTorchModel, self).__init__()
self.dense1 = nn.Linear(2, 4)
self.dense2 = nn.Linear(4, 8)
self.dense3 = nn.Linear(8, 1)
self.dense2 = nn.Linear(4, 1)

def forward(self, x):
x = self.dense1(x)
x = self.dense2(x)
x = F.sigmoid(self.dense3(x))
x = torch.sigmoid(self.dense2(x))
return x

if __name__ == '__main__':
sparkConf = init_spark_conf().setAppName("testNNClassifer").setMaster('local[1]')
sc = init_nncontext(sparkConf)
sqlContext = SQLContext(sc)
df = get_df(sqlContext)
spark = SparkSession \
.builder \
.getOrCreate()

df = spark.createDataFrame(
[(Vectors.dense([2.0, 1.0]), 1.0),
(Vectors.dense([1.0, 2.0]), 0.0),
(Vectors.dense([2.0, 1.0]), 1.0),
(Vectors.dense([1.0, 2.0]), 0.0)],
["features", "label"])

torch_model = SimpleTorchModel()
becloss = BCELoss()
torch_criterion = nn.MSELoss()

az_model = TorchNet.from_pytorch(torch_model, [1, 2])
az_criterion = TorchCriterion.from_pytorch(torch_criterion, [1, 1], [1, 1])

model = TorchNet.from_pytorch(module=torch_model,
input_shape=[1, 2],
lossFunc=becloss.forward,
pred_shape=[1, 1], label_shape=[1, 1])
classifier = NNEstimator(model, TorchIdentityCriterion(), SeqToTensor([2])) \
.setBatchSize(2) \
classifier = NNClassifier(az_model, az_criterion) \
.setBatchSize(4) \
.setOptimMethod(Adam()) \
.setLearningRate(0.1) \
.setMaxEpoch(20)
.setLearningRate(0.01) \
.setMaxEpoch(10)

nnClassifierModel = classifier.fit(df)

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -60,8 +60,7 @@ def forward(self, x):
def lossFunc(input, target):
return nn.CrossEntropyLoss().forward(input, target.flatten().long())

torchcriterion = TorchCriterion.from_pytorch(loss=lossFunc, input_shape=[1, 2],
sample_label=torch.LongTensor([1]))
torchcriterion = TorchCriterion.from_pytorch(lossFunc, [1, 2], torch.LongTensor([1]))

# prepare training data as Spark DataFrame
image_path = sys.argv[1]
Expand All @@ -75,8 +74,7 @@ def lossFunc(input, target):
# run training and evaluation
featureTransformer = ChainedPreprocessing(
[RowToImageFeature(), ImageCenterCrop(224, 224),
ImageChannelNormalize(0, 0, 0, 255.0, 255.0, 255.0),
ImageChannelNormalize(0.485, 0.456, 0.406, 0.229, 0.224, 0.225),
ImageChannelNormalize(123.0, 117.0, 104.0, 255.0, 255.0, 255.0),
ImageMatToTensor(), ImageFeatureToTensor()])

classifier = NNClassifier(torchnet, torchcriterion, featureTransformer) \
Expand Down

0 comments on commit 2e38858

Please sign in to comment.