Merge remote-tracking branch 'origin/master' into mozga-intel/full_pa…

…th_include
apache · Jan 13, 2022 · 9d56915 · 9d56915
2 parents 57fee8f + 9fa75b4
commit 9d56915
Show file tree

Hide file tree

Showing 27 changed files with 408 additions and 1,150 deletions.
diff --git a/KEYS b/KEYS
diff --git a/README.md b/README.md
@@ -68,6 +68,7 @@ Contents
 
 What's New
 ----------
+* [1.9.0 Release](https://github.com/apache/incubator-mxnet/releases/tag/1.9.0) - MXNet 1.9.0 Release.
 * [1.8.0 Release](https://github.com/apache/incubator-mxnet/releases/tag/1.8.0) - MXNet 1.8.0 Release.
 * [1.7.0 Release](https://github.com/apache/incubator-mxnet/releases/tag/1.7.0) - MXNet 1.7.0 Release.
 * [1.6.0 Release](https://github.com/apache/incubator-mxnet/releases/tag/1.6.0) - MXNet 1.6.0 Release.

diff --git a/benchmark/python/dnnl/fc_add.py b/benchmark/python/dnnl/fc_add.py
@@ -0,0 +1,164 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+import time
+import gc
+import sys
+import mxnet as mx
+from mxnet.gluon import nn
+from mxnet.contrib import quantization
+
+#shape, num_hidden:
+sizes = [
+    ((  1, 224),   512),
+    ((  1, 224),  4096),
+    (( 16, 1024), 1024),
+    (( 32, 4096), 1024),
+    (( 32, 4096), 4096),
+    ((512,  512), 4096)]
+
+rounds = 1000
+warmup = 10
+
+test_header = "--no_test_header" not in sys.argv
+table_header = "--no_table_header" not in sys.argv
+table_left_colums = "--no_size_column" not in sys.argv
+dump_graph = "--dump_graph" in sys.argv
+
+def dump_graph_fn(net, postfix):
+    if dump_graph:
+        net.export("/tmp/fc_add_" + postfix)
+
+def operator_string(elemwise_add):
+    return 'elemwise_add' if elemwise_add else 'npi_add'
+
+def print_header(header):
+    print("\n")
+    print(header if test_header else "", "\n")
+    if table_header:
+        if table_left_colums:
+            print("|    Shape    | Hidden | Mean [ms] |" )
+            print("|------------:|-------:|----------:|" )
+        else:
+            print(" Mean [ms] |" )
+            print("----------:|" )
+
+def print_value(shape, hidden, mean):
+    if table_left_colums:
+        print("| ({:4},{:4}) | {:6} | {:9.3f} |".format(shape[0], shape[1], hidden, mean))
+    else:
+        print(" {:9.3f} |".format(mean))
+
+
+def measure(net, data0, data1, data2, shape, nhid):
+    mx.nd.waitall()
+    gc.collect()
+    gc.disable()
+    for i in range(rounds + warmup):
+        if i == warmup:
+            start_time = time.time()
+        o = net(data0, data1, data2)
+        o.wait_to_read()
+    end_time = time.time()
+    run_time = (end_time - start_time)
+    print_value(shape, nhid, 1000 * run_time / rounds)
+    gc.enable()
+
+
+class FCWithSum(nn.HybridBlock):
+    def __init__(self, num_in, num_hidden, elemwise_add, **kwargs):
+        super(FCWithSum, self).__init__(**kwargs)
+        self.fc0 = nn.Dense(units=num_hidden, in_units=num_in)
+        self.fc1 = nn.Dense(units=num_hidden)
+        self.elemwise_add = elemwise_add
+
+    def forward(self, data0, data1, data2):
+        _fc0 = self.fc0(data0)
+        _fc1 = self.fc1(data1)
+        if  self.elemwise_add:
+            _sum0 = mx.nd.elemwise_add(data2.as_nd_ndarray(), _fc0.as_nd_ndarray()).as_np_ndarray()
+            _sum1 = mx.nd.elemwise_add(_fc1.as_nd_ndarray(), _sum0.as_nd_ndarray()).as_np_ndarray()
+        else:
+            _sum0 = data2 + _fc0
+            _sum1 = _fc1 + _sum0
+        return _sum1
+
+def benchmark_float(elemwise_add):
+    header = operator_string(elemwise_add) + ', float'
+    print_header(header)
+    for shape, nhid in sizes:
+        net = FCWithSum(shape[1], nhid, elemwise_add)
+        net.initialize()
+        net.hybridize(static_alloc=True, static_shape=True)
+        data0 = mx.np.random.uniform(size=shape, low=-1.0, high=1.0)
+        data1 = mx.np.random.uniform(size=shape, low=-1.0, high=1.0)
+        shape2 = (shape[0], nhid)
+        data2 = mx.np.random.uniform(size=shape2, low=-1.0, high=1.0)
+        net.optimize_for(data0, data1, data2, backend='ONEDNN')
+        measure(net, data0, data1, data2, shape, nhid)
+    dump_graph_fn(net, operator_string(elemwise_add) + '_float')
+
+class CalibIter(mx.io.DataIter):
+    def __init__(self, batch, data_shape, batch_size):
+        super(CalibIter, self).__init__(batch_size)
+        self.label_shape = (batch_size,)
+        self.data_shape = data_shape
+        if isinstance(data_shape, tuple):
+            self.provide_data = [('data', data_shape)]
+        else:
+            self.provide_data = data_shape
+        self.provide_label = []
+        self.batch = batch
+    def __iter__(self):
+        yield self.batch
+
+def benchmark_int8(quantize_mode, quantize_granularity, elemwise_add):
+    header = operator_string(elemwise_add) + ', mode = ' + quantize_mode + \
+             ', granularity = ' + quantize_granularity
+    print_header(header)
+    for shape, nhid in sizes:
+        net = FCWithSum(shape[1], nhid, elemwise_add)
+        net.initialize()
+        net.hybridize(static_alloc=True, static_shape=True)
+        data0 = mx.np.random.uniform(size=shape, low=-1.0, high=1.0)
+        data1 = mx.np.random.uniform(size=shape, low=-1.0, high=1.0)
+        shape2 = (shape[0], nhid)
+        data2 = mx.np.random.uniform(size=shape2, low=-1.0, high=1.0)
+        data = mx.gluon.data.ArrayDataset(data0, data1, data2)
+        calib_data = mx.gluon.data.DataLoader(data, batch_size=1)
+        net = quantization.quantize_net(net,
+                                        device=mx.cpu(),
+                                        exclude_layers=None,
+                                        exclude_operators=None,
+                                        calib_mode='naive',
+                                        calib_data=calib_data,
+                                        num_calib_batches=1,
+                                        quantize_mode=quantize_mode,
+                                        quantize_granularity=quantize_granularity
+                                        )
+        net.hybridize(static_alloc=True, static_shape=True)
+        measure(net, data0, data1, data2, shape, nhid)
+    dump_graph_fn(net, operator_string(elemwise_add) + \
+                    '_' + str(quantize_mode) + '_' + str(quantize_granularity))
+
+for elemwise_add in [True, False]:
+    benchmark_float(elemwise_add)
+
+for quantize_mode in ['smart', 'full']:
+    for quantize_granularity in ['tensor-wise', 'channel-wise']:
+        for elemwise_add in [True, False]:
+            benchmark_int8(quantize_mode, quantize_granularity, elemwise_add)
diff --git a/benchmark/python/dnnl/run.sh b/benchmark/python/dnnl/run.sh
@@ -0,0 +1,54 @@
+#!/bin/bash
+
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+# Script for running python benchmark with properly setting OMP prarameters for it
+
+check_parametrs() {
+ 	if [ "$#" -eq 0 ] ; then
+		echo "Please give python script to run as parameter."
+		echo "Optionally you can give number of threads to use and python scripts parameters:"
+		echo "    `basename "$0"`  [num_threads] python_script [python script parameters]"
+		exit
+	fi
+}
+
+check_parametrs $@
+
+NUM_SOCKET=`lscpu | grep 'Socket(s)' | awk '{print $NF}'`
+CORES_PER_SOCKET=`lscpu | grep 'Core(s) per socket' | awk '{print $NF}'`
+NUM_CORES=$((CORES_PER_SOCKET * NUM_SOCKET))
+
+integer_reg='^[0-9]+$'
+if [[ $1 =~ $integer_reg ]] ; then
+	if (($1 > $NUM_CORES)); then
+		echo >&2
+		echo "WARNING: given number of threads = $1" \
+			" is greater than number of physical cores = $NUM_CORES." >&2
+		echo >&2
+	fi
+	NUM_CORES=$1
+	shift
+	check_parametrs $@
+fi
+
+CORES={0}:${NUM_CORES}:1
+
+INSTRUCTION="OMP_NUM_THREADS=${NUM_CORES} OMP_PROC_BIND=TRUE OMP_PLACES=${CORES} python3 -u $@"
+echo $INSTRUCTION >&2
+eval $INSTRUCTION
diff --git a/benchmark/python/dnnl/run_per_thread.sh b/benchmark/python/dnnl/run_per_thread.sh
@@ -0,0 +1,82 @@
+#!/bin/bash
+
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+# Script for running python benchmark against number of used OMP threads
+
+
+help_and_exit() {
+	echo "Usage:"
+	echo "    `basename "$0"`  [start_num_threads step_num_threads end_num_threads] python_script [python script parameters]"
+	echo "Number of threads range parameters and python script are optional."
+	exit
+}
+
+if [ "$#" -eq 0 ] ; then
+	help_and_exit
+fi
+
+NUM_SOCKET=`lscpu | grep 'Socket(s)' | awk '{print $NF}'`
+CORES_PER_SOCKET=`lscpu | grep 'Core(s) per socket' | awk '{print $NF}'`
+NUM_CORES=$((CORES_PER_SOCKET * NUM_SOCKET))
+
+NT_START=1
+NT_STEP=1
+NT_END=$NUM_CORES
+
+integer_reg='^[0-9]+$'
+signed_integer_reg='^[+-]*[0-9]+$'
+if [[ $1 =~ $integer_reg ]] ; then
+	if [[ $2 =~ $signed_integer_reg ]] && [[ $3 =~ $integer_reg ]]; then
+		NT_START=$1
+		NT_STEP=$2
+		NT_END=$3
+		shift 3
+		if [ "$#" -eq 0 ] ; then
+			help_and_exit
+		fi
+	else
+		echo "Provide 3 numbers for threads range: start, step and the end."
+		help_and_exit
+	fi
+fi
+
+NT_SEQUENCE=`seq $NT_START $NT_STEP $NT_END`
+if [ -z "$NT_SEQUENCE" ]; then
+	echo "Given threads range produce empy sequence."
+	help_and_exit
+else
+	echo "Start python script $1 for following number of threads:"  >&2
+	echo $NT_SEQUENCE  >&2
+fi
+
+RUN_SCRIPT=`dirname "$0"`/run.sh
+for NT in $NT_SEQUENCE;
+do
+	TMP_FILE=/tmp/_result_${NT}.txt
+	echo  1>${TMP_FILE}
+	if [[ $NT -eq $NT_START ]]; then
+		echo "NUM_THREADS = $NT" 1>>${TMP_FILE}
+		$RUN_SCRIPT $NT $@ 1>>${TMP_FILE}
+	else
+		echo " $NT" 1>>${TMP_FILE}
+		$RUN_SCRIPT $NT $@ --no_size_column --no_test_header 1>>${TMP_FILE}
+	fi
+	TMP_FILES+=" ${TMP_FILE}"
+done
+paste -d "" ${TMP_FILES}
diff --git a/ci/docker/runtime_functions.sh b/ci/docker/runtime_functions.sh
@@ -1329,7 +1329,7 @@ build_docs() {
 
     # copy the full site for this version to versions folder
     mkdir -p html/versions/master
-    for f in 404.html api assets blog community ecosystem features feed.xml get_started index.html; do
+    for f in 404.html api assets blog community ecosystem features trusted_by feed.xml get_started index.html; do
         cp -r html/$f html/versions/master/
     done
 

diff --git a/docs/python_docs/python/tutorials/getting-started/logistic_regression_explained.md b/docs/python_docs/python/tutorials/getting-started/logistic_regression_explained.md
@@ -92,7 +92,7 @@ After defining the model, we need to define a few more things: our loss, our tra
 
 Loss function is used to calculate how the output of the network differs from the ground truth. Because classes  of the logistic regression are either 0 or 1, we are using [SigmoidBinaryCrossEntropyLoss](../../api/gluon/loss/index.rst#mxnet.gluon.loss.SigmoidBinaryCrossEntropyLoss). Notice that we do not specify `from_sigmoid` attribute in the code, which means that the output of the neuron doesn't need to go through sigmoid, but at inference we'd have to pass it through sigmoid. You can learn more about cross entropy on [wikipedia](https://en.wikipedia.org/wiki/Cross_entropy).
 
-Trainer object allows to specify the method of training to be used. For our tutorial we use [Stochastic Gradient Descent (SGD)](../../api/optimizer/index.rst#mxnet.optimizer.SGD). For more information on SGD refer to [the following tutorial](https://gluon.mxnet.io/chapter06_optimization/gd-sgd-scratch.html). We also need to parametrize it with learning rate value, which defines the weight updates, and weight decay, which is used for regularization.
+Trainer object allows to specify the method of training to be used. For our tutorial we use [Stochastic Gradient Descent (SGD)](../../api/optimizer/index.rst#mxnet.optimizer.SGD). For more information on SGD refer to [the following tutorial](https://d2l.ai/chapter_optimization/sgd.html). We also need to parametrize it with learning rate value, which defines the weight updates, and weight decay, which is used for regularization.
 
 Metric helps us to estimate how good our model is in terms of a problem we are trying to solve. Where loss function has more importance for the training process, a metric is usually the thing we are trying to improve and reach maximum value. We also can use more than one metric, to measure various aspects of our model. In our example, we are using [Accuracy](../../api/gluon/metric/index.rst#mxnet.gluon.metric.Accuracy) and [F1 score](../../api/gluon/metric/index.rst#mxnet.gluon.metric.F1) as measurements of success of our model.
 

diff --git a/docs/python_docs/python/tutorials/packages/gluon/image/info_gan.md b/docs/python_docs/python/tutorials/packages/gluon/image/info_gan.md
@@ -19,7 +19,7 @@
 # Image similarity search with InfoGAN
 
 This notebook shows how to implement an InfoGAN based on Gluon. InfoGAN is an extension of GANs, where the generator input is split in 2 parts: random noise and a latent code (see [InfoGAN Paper](https://arxiv.org/pdf/1606.03657.pdf)).
-The codes are made meaningful by maximizing the mutual information between code and generator output. InfoGAN learns a disentangled representation in a completely unsupervised manner. It can be used for many applications such as image similarity search. This notebook uses the DCGAN example from the [Straight Dope Book](https://gluon.mxnet.io/chapter14_generative-adversarial-networks/dcgan.html) and extends it to create an InfoGAN.
+The codes are made meaningful by maximizing the mutual information between code and generator output. InfoGAN learns a disentangled representation in a completely unsupervised manner. It can be used for many applications such as image similarity search. This notebook uses the DCGAN example and extends it to create an InfoGAN.
 
 
 ```{.python .input}
@@ -112,7 +112,7 @@ train_dataloader = gluon.data.DataLoader(train_data, batch_size=batch_size, shuf
 ```
 
 ## Generator
-Define the Generator model. Architecture is taken from the DCGAN implementation in [Straight Dope Book](https://gluon.mxnet.io/chapter14_generative-adversarial-networks/dcgan.html). The Generator consist of  4 layers where each layer involves a strided convolution, batch normalization, and rectified nonlinearity. It takes as input random noise and the latent code and produces an `(64,64,3)` output image.
+Define the Generator model. The Generator consist of  4 layers where each layer involves a strided convolution, batch normalization, and rectified nonlinearity. It takes as input random noise and the latent code and produces an `(64,64,3)` output image.
 
 
 ```{.python .input}

diff --git a/docs/python_docs/themes/mx-theme/mxtheme/header_top.html b/docs/python_docs/themes/mx-theme/mxtheme/header_top.html
@@ -18,6 +18,7 @@
         <a class="page-link" href="{{theme_relative_url}}features">Features</a>
         <a class="page-link" href="{{theme_relative_url}}ecosystem">Ecosystem</a>
         <a class="page-link page-current" href="{{theme_relative_url}}api">Docs & Tutorials</a>
+        <a class="page-link" href="{{theme_relative_url}}trusted_by">Trusted By</a>
         <a class="page-link" href="https://github.com/apache/incubator-mxnet">GitHub</a>
         <div class="dropdown">
           <span class="dropdown-header">master

diff --git a/docs/static_site/src/_includes/header.html b/docs/static_site/src/_includes/header.html
@@ -85,6 +85,7 @@
         <a class="page-link" href="{{'/features' | relative_url }}">Features</a>
         <a class="page-link" href="{{'/ecosystem' | relative_url }}">Ecosystem</a>
         <a class="page-link" href="{{'/api' | relative_url }}">Docs & Tutorials</a>
+        <a class="page-link" href="{{'/trusted_by' | relative_url }}">Trusted By</a>
         <a class="page-link" href="https://github.com/apache/incubator-mxnet">GitHub</a>
         <div class="dropdown">
           <span class="dropdown-header">master

diff --git a/docs/static_site/src/_layouts/home.html b/docs/static_site/src/_layouts/home.html
@@ -178,14 +178,6 @@ <h3>{{feature.title}}</h3>
             </div>
         </div>
     </div>
-    <div class="trusted-by-section section ">
-        <div class="wrapper">
-            <div class="trusted-by">
-                <h2>Already trusted by</h2>
-                <img src="{{'/assets/img/logos.png' | relative_url}}">
-            </div>
-        </div>
-    </div>
 </main>
 
 {%- include footer.html -%}

diff --git a/docs/static_site/src/_sass/minima/_home.scss b/docs/static_site/src/_sass/minima/_home.scss
@@ -170,20 +170,6 @@ a.btn {
   background-color: white;
 }
 
-// ===
-// trusted-by
-// ===
-
-.trusted-by-section {
-  background-color: white;
-  color: $grey-color-dark;
-}
-
-.trusted-by {
-  margin-top: 30px;
-
-}
-
 // ===
 // news
 // ===

diff --git a/docs/static_site/src/assets/img/kubeflow.png b/docs/static_site/src/assets/img/kubeflow.png