pytorch doc and example update. (intel-analytics#1560)

* pytorch doc * update example * remove transfomer * doc and example update * fix sample_input * check none * param update * check trace
sgwhat · Aug 15, 2019 · 2e38858 · 2e38858
1 parent 179e8b1
commit 2e38858
Show file tree

Hide file tree

Showing 6 changed files with 50 additions and 57 deletions.
diff --git a/python/orca/example/torchmodel/README.md → ...ca/example/torchmodel/inference/README.md b/python/orca/example/torchmodel/README.md → ...ca/example/torchmodel/inference/README.md
@@ -1,25 +1,22 @@
 ## Torch ResNet Prediction Example
 
-TorchNet wraps a TorchScript model as a single layer, thus the Pytorch model can be used for
-distributed inference. This example illustrates that a PyTorch program, with One line of change,
+TorchNet wraps a Pytorch model as Analytics Zoo module, thus the Pytorch model can be used for
+distributed inference. This example illustrates that a PyTorch program, with few lines of change,
 can be executed on Apache Spark.
 
 ## Install or download Analytics Zoo
-Follow the instructions [here](https://analytics-zoo.github.io/master/#PythonUserGuide/install/) to install analytics-zoo via __pip__ or __download the prebuilt package__.
+Follow the instructions [here](https://analytics-zoo.github.io/master/#PythonUserGuide/install/)
+to install analytics-zoo via __pip__ or __download the prebuilt package__.
 
 ## Model and Data Preparation
 
-1. Prepare the image dataset for inference. Put the images to do prediction in the same folder.
-
+We use ResNet 18 from torchvision and run inference on some images, e.g. images from ImageNet.
 
 ## Run this example after pip install
 ```bash
 python predict.py --image path_to_image_folder
 ```
 
-__Options:__
-* `--image` The path where the images are stored. 
-
 ## Run this example with prebuilt package
 ```bash
 export SPARK_HOME=the root directory of Spark

diff --git a/python/orca/example/torchmodel/inference/__init__.py b/python/orca/example/torchmodel/inference/__init__.py
@@ -0,0 +1,15 @@
+#
+# Copyright 2018 Analytics Zoo Authors.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
diff --git a/python/orca/example/torchmodel/predict.py → ...a/example/torchmodel/inference/predict.py b/python/orca/example/torchmodel/predict.py → ...a/example/torchmodel/inference/predict.py
@@ -49,10 +49,6 @@ def predict(img_path):
     parser.add_option("--image", type=str, dest="img_path",
                       help="The path where the images are stored, "
                            "can be either a folder or an image path")
-    parser.add_option("--model", type=str, dest="model_path",
-                      help="The path of the TensorFlow object detection model")
-    parser.add_option("--partition_num", type=int, dest="partition_num", default=4,
-                      help="The number of partitions")
     (options, args) = parser.parse_args(sys.argv)
 
     sc = init_nncontext("Torch ResNet Prediction Example")

diff --git a/python/orca/example/torchmodel/train/Lenet_mnist.py b/python/orca/example/torchmodel/train/Lenet_mnist.py
@@ -49,8 +49,7 @@ def forward(self, x):
 
 
 if __name__ == '__main__':
-    sparkConf = init_spark_conf().setAppName("test_pytorch_lenet").setMaster("local[1]")\
-        .set('spark.driver.memory', '10g')
+    sparkConf = init_spark_conf().setAppName("test_pytorch_lenet")
     sc = init_nncontext(sparkConf)
     spark = SparkSession.builder.config(conf=sparkConf).getOrCreate()
 
@@ -70,9 +69,8 @@ def lossFunc(input, target):
         return nn.CrossEntropyLoss().forward(input, target.flatten().long())
 
     torch_model = LeNet()
-    model = TorchNet.from_pytorch(module=torch_model, input_shape=[1, 1, 28, 28])
-    criterion = TorchCriterion.from_pytorch(loss=lossFunc, input_shape=[1, 10],
-                                            sample_label=torch.LongTensor([5]))
+    model = TorchNet.from_pytorch(torch_model, [1, 1, 28, 28])
+    criterion = TorchCriterion.from_pytorch(lossFunc, [1, 10], torch.LongTensor([5]))
     classifier = NNClassifier(model, criterion, SeqToTensor([1, 28, 28])) \
         .setBatchSize(64) \
         .setOptimMethod(Adam()) \

diff --git a/python/orca/example/torchmodel/train/SimpleTrainingExample.py b/python/orca/example/torchmodel/train/SimpleTrainingExample.py
@@ -13,66 +13,55 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 #
+import torch
 import torch.nn as nn
-import torch.nn.functional as F
-from torch.nn.modules.loss import BCELoss
-from bigdl.nn.criterion import *
-from bigdl.nn.layer import *
 from bigdl.optim.optimizer import Adam
-from pyspark.sql.types import *
-from zoo import init_nncontext
 from zoo.common.nncontext import *
-from zoo.pipeline.api.net.torch_net import TorchNet, TorchIdentityCriterion
+from zoo.pipeline.api.net.torch_net import TorchNet
+from zoo.pipeline.api.net.torch_criterion import TorchCriterion
 from zoo.pipeline.nnframes import *
 
-
-# create training data as Spark DataFrame
-def get_df(sqlContext):
-    data = sc.parallelize([
-        ((2.0, 1.0), 1.0),
-        ((1.0, 2.0), 0.0),
-        ((2.0, 1.0), 1.0),
-        ((1.0, 2.0), 0.0)])
-
-    schema = StructType([
-        StructField("features", ArrayType(DoubleType(), False), False),
-        StructField("label", DoubleType(), False)])
-    df = sqlContext.createDataFrame(data, schema)
-    return df
+from pyspark.ml.linalg import Vectors
+from pyspark.sql import SparkSession
 
 
 # define model with Pytorch
 class SimpleTorchModel(nn.Module):
     def __init__(self):
         super(SimpleTorchModel, self).__init__()
         self.dense1 = nn.Linear(2, 4)
-        self.dense2 = nn.Linear(4, 8)
-        self.dense3 = nn.Linear(8, 1)
+        self.dense2 = nn.Linear(4, 1)
 
     def forward(self, x):
         x = self.dense1(x)
-        x = self.dense2(x)
-        x = F.sigmoid(self.dense3(x))
+        x = torch.sigmoid(self.dense2(x))
         return x
 
 if __name__ == '__main__':
     sparkConf = init_spark_conf().setAppName("testNNClassifer").setMaster('local[1]')
     sc = init_nncontext(sparkConf)
-    sqlContext = SQLContext(sc)
-    df = get_df(sqlContext)
+    spark = SparkSession \
+        .builder \
+        .getOrCreate()
+
+    df = spark.createDataFrame(
+        [(Vectors.dense([2.0, 1.0]), 1.0),
+         (Vectors.dense([1.0, 2.0]), 0.0),
+         (Vectors.dense([2.0, 1.0]), 1.0),
+         (Vectors.dense([1.0, 2.0]), 0.0)],
+        ["features", "label"])
 
     torch_model = SimpleTorchModel()
-    becloss = BCELoss()
+    torch_criterion = nn.MSELoss()
+
+    az_model = TorchNet.from_pytorch(torch_model, [1, 2])
+    az_criterion = TorchCriterion.from_pytorch(torch_criterion, [1, 1], [1, 1])
 
-    model = TorchNet.from_pytorch(module=torch_model,
-                                  input_shape=[1, 2],
-                                  lossFunc=becloss.forward,
-                                  pred_shape=[1, 1], label_shape=[1, 1])
-    classifier = NNEstimator(model, TorchIdentityCriterion(), SeqToTensor([2])) \
-        .setBatchSize(2) \
+    classifier = NNClassifier(az_model, az_criterion) \
+        .setBatchSize(4) \
         .setOptimMethod(Adam()) \
-        .setLearningRate(0.1) \
-        .setMaxEpoch(20)
+        .setLearningRate(0.01) \
+        .setMaxEpoch(10)
 
     nnClassifierModel = classifier.fit(df)
 

diff --git a/python/orca/example/torchmodel/train/resnet_finetune/resnet_finetune.py b/python/orca/example/torchmodel/train/resnet_finetune/resnet_finetune.py
@@ -60,8 +60,7 @@ def forward(self, x):
     def lossFunc(input, target):
         return nn.CrossEntropyLoss().forward(input, target.flatten().long())
 
-    torchcriterion = TorchCriterion.from_pytorch(loss=lossFunc, input_shape=[1, 2],
-                                                 sample_label=torch.LongTensor([1]))
+    torchcriterion = TorchCriterion.from_pytorch(lossFunc, [1, 2], torch.LongTensor([1]))
 
     # prepare training data as Spark DataFrame
     image_path = sys.argv[1]
@@ -75,8 +74,7 @@ def lossFunc(input, target):
     # run training and evaluation
     featureTransformer = ChainedPreprocessing(
         [RowToImageFeature(), ImageCenterCrop(224, 224),
-         ImageChannelNormalize(0, 0, 0, 255.0, 255.0, 255.0),
-         ImageChannelNormalize(0.485, 0.456, 0.406, 0.229, 0.224, 0.225),
+         ImageChannelNormalize(123.0, 117.0, 104.0, 255.0, 255.0, 255.0),
          ImageMatToTensor(), ImageFeatureToTensor()])
 
     classifier = NNClassifier(torchnet, torchcriterion, featureTransformer) \