From 525ead9caaf49035b0310ef7c8b686b393463760 Mon Sep 17 00:00:00 2001 From: Alexander Zai Date: Tue, 14 Aug 2018 13:22:44 -0400 Subject: [PATCH] [Flaky Test] Fix test_gluon_model_zoo.test_models when MXNET_MKLDNN_DEBUG=1 (#12069) * reorder inputs * use function flatten vs build in method * update similar array atoi to 0.01 * fix reorder * enable MXNET_MKLDNN_DEBUG in CI * add exclude debug flag * fix lint * add warning log for excluded op * retrigger --- ci/docker/runtime_functions.sh | 28 +++++++-------------------- python/mxnet/gluon/nn/basic_layers.py | 2 +- src/operator/nn/lrn.cc | 2 ++ src/operator/nn/mkldnn/mkldnn_base.cc | 15 ++++++++++---- 4 files changed, 21 insertions(+), 26 deletions(-) diff --git a/ci/docker/runtime_functions.sh b/ci/docker/runtime_functions.sh index bbcffe68bb0f..24f844fdfc5a 100755 --- a/ci/docker/runtime_functions.sh +++ b/ci/docker/runtime_functions.sh @@ -581,9 +581,7 @@ sanity_check() { unittest_ubuntu_python2_cpu() { set -ex export PYTHONPATH=./python/ - # MXNET_MKLDNN_DEBUG is buggy and produces false positives - # https://github.com/apache/incubator-mxnet/issues/10026 - #export MXNET_MKLDNN_DEBUG=1 # Ignored if not present + export MXNET_MKLDNN_DEBUG=1 export MXNET_STORAGE_FALLBACK_LOG_VERBOSE=0 nosetests-2.7 $NOSE_COVERAGE_ARGUMENTS --with-xunit --xunit-file nosetests_unittest.xml --verbose tests/python/unittest nosetests-2.7 $NOSE_COVERAGE_ARGUMENTS --with-xunit --xunit-file nosetests_train.xml --verbose tests/python/train @@ -593,9 +591,7 @@ unittest_ubuntu_python2_cpu() { unittest_ubuntu_python3_cpu() { set -ex export PYTHONPATH=./python/ - # MXNET_MKLDNN_DEBUG is buggy and produces false positives - # https://github.com/apache/incubator-mxnet/issues/10026 - #export MXNET_MKLDNN_DEBUG=1 # Ignored if not present + export MXNET_MKLDNN_DEBUG=1 # Ignored if not present export MXNET_STORAGE_FALLBACK_LOG_VERBOSE=0 nosetests-3.4 $NOSE_COVERAGE_ARGUMENTS --with-xunit --xunit-file nosetests_unittest.xml --verbose tests/python/unittest nosetests-3.4 $NOSE_COVERAGE_ARGUMENTS --with-xunit --xunit-file nosetests_quantization.xml --verbose tests/python/quantization @@ -604,9 +600,7 @@ unittest_ubuntu_python3_cpu() { unittest_ubuntu_python3_cpu_mkldnn() { set -ex export PYTHONPATH=./python/ - # MXNET_MKLDNN_DEBUG is buggy and produces false positives - # https://github.com/apache/incubator-mxnet/issues/10026 - #export MXNET_MKLDNN_DEBUG=1 # Ignored if not present + export MXNET_MKLDNN_DEBUG=1 # Ignored if not present export MXNET_STORAGE_FALLBACK_LOG_VERBOSE=0 nosetests-3.4 $NOSE_COVERAGE_ARGUMENTS --with-xunit --xunit-file nosetests_unittest.xml --verbose tests/python/unittest nosetests-3.4 $NOSE_COVERAGE_ARGUMENTS --with-xunit --xunit-file nosetests_mkl.xml --verbose tests/python/mkl @@ -615,9 +609,7 @@ unittest_ubuntu_python3_cpu_mkldnn() { unittest_ubuntu_python2_gpu() { set -ex export PYTHONPATH=./python/ - # MXNET_MKLDNN_DEBUG is buggy and produces false positives - # https://github.com/apache/incubator-mxnet/issues/10026 - #export MXNET_MKLDNN_DEBUG=1 # Ignored if not present + export MXNET_MKLDNN_DEBUG=1 # Ignored if not present export MXNET_STORAGE_FALLBACK_LOG_VERBOSE=0 nosetests-2.7 $NOSE_COVERAGE_ARGUMENTS --with-xunit --xunit-file nosetests_gpu.xml --verbose tests/python/gpu } @@ -649,9 +641,7 @@ tutorialtest_ubuntu_python2_gpu() { unittest_ubuntu_python3_gpu() { set -ex export PYTHONPATH=./python/ - # MXNET_MKLDNN_DEBUG is buggy and produces false positives - # https://github.com/apache/incubator-mxnet/issues/10026 - #export MXNET_MKLDNN_DEBUG=1 # Ignored if not present + export MXNET_MKLDNN_DEBUG=1 # Ignored if not present export MXNET_STORAGE_FALLBACK_LOG_VERBOSE=0 nosetests-3.4 $NOSE_COVERAGE_ARGUMENTS --with-xunit --xunit-file nosetests_gpu.xml --verbose tests/python/gpu } @@ -678,9 +668,7 @@ unittest_ubuntu_tensorrt_gpu() { unittest_ubuntu_python2_quantization_gpu() { set -ex export PYTHONPATH=./python/ - # MXNET_MKLDNN_DEBUG is buggy and produces false positives - # https://github.com/apache/incubator-mxnet/issues/10026 - #export MXNET_MKLDNN_DEBUG=1 # Ignored if not present + export MXNET_MKLDNN_DEBUG=1 # Ignored if not present export MXNET_STORAGE_FALLBACK_LOG_VERBOSE=0 nosetests-2.7 $NOSE_COVERAGE_ARGUMENTS --with-xunit --xunit-file nosetests_quantization_gpu.xml --verbose tests/python/quantization_gpu } @@ -690,9 +678,7 @@ unittest_ubuntu_python2_quantization_gpu() { unittest_ubuntu_python3_quantization_gpu() { set -ex export PYTHONPATH=./python/ - # MXNET_MKLDNN_DEBUG is buggy and produces false positives - # https://github.com/apache/incubator-mxnet/issues/10026 - #export MXNET_MKLDNN_DEBUG=1 # Ignored if not present + export MXNET_MKLDNN_DEBUG=1 # Ignored if not present export MXNET_STORAGE_FALLBACK_LOG_VERBOSE=0 nosetests-3.4 $NOSE_COVERAGE_ARGUMENTS --with-xunit --xunit-file nosetests_quantization_gpu.xml --verbose tests/python/quantization_gpu } diff --git a/python/mxnet/gluon/nn/basic_layers.py b/python/mxnet/gluon/nn/basic_layers.py index ad69d4e9dd90..d26841977ac2 100644 --- a/python/mxnet/gluon/nn/basic_layers.py +++ b/python/mxnet/gluon/nn/basic_layers.py @@ -427,7 +427,7 @@ def __init__(self, **kwargs): super(Flatten, self).__init__(**kwargs) def hybrid_forward(self, F, x): - return x.reshape((0, -1)) + return F.Flatten(x) def __repr__(self): return self.__class__.__name__ diff --git a/src/operator/nn/lrn.cc b/src/operator/nn/lrn.cc index 6b3d7c818378..30a752340a5b 100644 --- a/src/operator/nn/lrn.cc +++ b/src/operator/nn/lrn.cc @@ -204,6 +204,8 @@ NNVM_REGISTER_OP(_backward_LRN) .set_attr("TIsBackward", true) #if MXNET_USE_MKLDNN == 1 .set_attr("FComputeEx", LRNGradComputeExCPU) +// Native compute requires norm while MKLDNN does not so cannot be compared in debug mode +.set_attr("TExcludeMKLDNNDebug", true) #endif .set_attr("FCompute", LRNGradCompute); diff --git a/src/operator/nn/mkldnn/mkldnn_base.cc b/src/operator/nn/mkldnn/mkldnn_base.cc index 4e4982e96ee5..27c574deae53 100644 --- a/src/operator/nn/mkldnn/mkldnn_base.cc +++ b/src/operator/nn/mkldnn/mkldnn_base.cc @@ -473,9 +473,11 @@ void OpCheck::Init(const std::vector &inputs_, auto ctx = inputs_[0].ctx(); CHECK(!MKLDNNStream::Get()->HasOps()); for (size_t i = 0; i < inputs_.size(); i++) { - inputs.emplace_back(inputs_[i].shape(), ctx, - false, inputs_[i].dtype()); - auto mem = inputs_[i].GetMKLDNNData(); + NDArray data = inputs_[i]; + inputs.emplace_back(data.shape(), ctx, false, data.dtype()); + if (data.IsMKLDNNData() && data.IsView()) + data = data.Reorder2Default(); + auto mem = data.GetMKLDNNData(); inputs[i].CopyFrom(*mem); } for (size_t i = 0; i < outputs_.size(); i++) { @@ -494,6 +496,11 @@ void OpCheck::Run(mxnet::FCompute fn, const nnvm::NodeAttrs &attrs, const std::vector &inputs_, const std::vector &req, const std::vector &outputs_) { + static auto& is_excluded = Op::GetAttr("TExcludeMKLDNNDebug"); + if (is_excluded.get(attrs.op, false)) { + LOG(WARNING) << attrs.op->name << " not checked. TExcludeMKLDNNDebug flag present"; + return; + } std::vector in_blobs(inputs.size()); for (size_t i = 0; i < in_blobs.size(); i++) in_blobs[i] = inputs[i].data(); std::vector out_blobs(outputs.size()); @@ -509,7 +516,7 @@ void OpCheck::Run(mxnet::FCompute fn, const nnvm::NodeAttrs &attrs, if (req[i] == kNullOp) continue; MSHADOW_TYPE_SWITCH(outputs[i].dtype(), DType, { - bool similar = SimilarArray(outputs[i], outputs_[i], 1e-3, 1e-3); + bool similar = SimilarArray(outputs[i], outputs_[i], 1e-2, 1e-2); if (!similar) { LOG(ERROR) << attrs.op->name << " fails"; }