From 1eda222a58631eb2c3584df4de5f77e59954043b Mon Sep 17 00:00:00 2001 From: ArmageddonKnight Date: Mon, 13 Jul 2020 23:08:24 -0400 Subject: [PATCH] Address Leo's comments on PR#18704 Fix the naming error Remove the deletion of test cases Fix the expected name Change the naming --- python/mxnet/gluon/parameter.py | 2 +- src/imperative/imperative.cc | 12 ++++++++++-- src/profiler/storage_profiler.cc | 5 +++++ tests/python/gpu/test_profiler_gpu.py | 24 +++++++++--------------- 4 files changed, 25 insertions(+), 18 deletions(-) diff --git a/python/mxnet/gluon/parameter.py b/python/mxnet/gluon/parameter.py index 68e860beacdb..6f66947d2c10 100644 --- a/python/mxnet/gluon/parameter.py +++ b/python/mxnet/gluon/parameter.py @@ -644,8 +644,8 @@ def var(self): """Returns a symbol representing this parameter.""" if self._var is None: if self._var_name is None: # _var_name is set manually in SymbolBlock.import + # The variable name is required by the storage profiler. self._var_name = self._uuid.replace('-', '_') + '_' + self._name - self._var = symbol.var(self._var_name, shape=self.shape, dtype=self.dtype, lr_mult=self.lr_mult, wd_mult=self.wd_mult, init=self.init, stype=self._stype) diff --git a/src/imperative/imperative.cc b/src/imperative/imperative.cc index e2028b0b2984..073cfa4f14f6 100644 --- a/src/imperative/imperative.cc +++ b/src/imperative/imperative.cc @@ -233,7 +233,11 @@ void Imperative::RecordOp( nnvm::ObjectPtr node = nnvm::Node::Create(); node->attrs = std::move(attrs); - node_count_ += 1; + if (node->attrs.name == "") { + node->attrs.name = "node_" + std::to_string(node_count_++); + } else { + node_count_++; + } AGInfo& info = AGInfo::Create(node); info.state = state; info.ctx = outputs[0]->ctx(); @@ -322,7 +326,11 @@ void Imperative::RecordDeferredCompute(nnvm::NodeAttrs &&attrs, } node->attrs = std::move(attrs); // Need to support NameManager in imperative API to better name node->attrs.name - node_count_ += 1; + if (node->attrs.name == "") { + node->attrs.name = "node_" + std::to_string(node_count_++); + } else { + node_count_++; + } for (uint32_t i = 0; i < outputs.size(); ++i) { outputs[i]->deferredcompute_entry_ = nnvm::NodeEntry{node, i, 0}; diff --git a/src/profiler/storage_profiler.cc b/src/profiler/storage_profiler.cc index b0025a98d1f6..4969813bc3df 100644 --- a/src/profiler/storage_profiler.cc +++ b/src/profiler/storage_profiler.cc @@ -70,6 +70,11 @@ void GpuDeviceStorageProfiler::DumpProfile() const { gpu_mem_alloc_entries_) { std::string alloc_entry_name = std::regex_replace(alloc_entry.second.name, gluon_param_regex, "$6"); + if (alloc_entry_name == "") { + // If the entry name becomes none after the regex replacement, we revert + // back to the original. + alloc_entry_name = alloc_entry.second.name; + } gpu_mem_ordered_alloc_entries.emplace( alloc_entry.second.profiler_scope + alloc_entry_name, AllocEntryDumpFmt{ diff --git a/tests/python/gpu/test_profiler_gpu.py b/tests/python/gpu/test_profiler_gpu.py index 05bec4a692dd..82510ddcc8bb 100644 --- a/tests/python/gpu/test_profiler_gpu.py +++ b/tests/python/gpu/test_profiler_gpu.py @@ -23,12 +23,13 @@ import mxnet as mx mx.test_utils.set_default_context(mx.gpu(0)) +from mxnet.gluon.block import _block_scope + curr_path = os.path.dirname(os.path.abspath(os.path.expanduser(__file__))) sys.path.insert(0, os.path.join(curr_path, '../unittest')) -from mxnet import profiler -from mxnet.gluon import nn -from mxnet.gluon.block import _block_scope -from test_profiler import enable_profiler +# We import all tests from ../unittest/test_profiler.py +# They will be detected by test framework, as long as the current file has a different filename +from test_profiler import * def test_gpu_memory_profiler_symbolic(): @@ -38,7 +39,7 @@ def test_gpu_memory_profiler_symbolic(): with profiler.scope("tensordot"): A = mx.sym.Variable('A') B = mx.sym.Variable('B') - C = mx.symbol.dot(A, B, name='dot') + C = mx.symbol.dot(A, B, name="dot") executor = C._simple_bind(mx.gpu(), 'write', A=(1024, 2048), B=(2048, 4096)) @@ -70,21 +71,20 @@ def test_gpu_memory_profiler_symbolic(): # Sample gpu_memory_profile.csv: # "Attribute Name","Requested Size","Device","Actual Size","Reuse?" + # :_head_grad_0,16777216,0,16777216,0 # init:_random_uniform,33554432,0,33554432,1 # init:_random_uniform,8388608,0,8388608,1 # resource:temp_space (sample_op.h +365),8,0,4096,0 # symbol:arg_grad:unknown,8388608,0,8388608,0 # symbol:arg_grad:unknown,33554432,0,33554432,0 # tensordot:dot,16777216,0,16777216,0 - # tensordot:dot_backward,33554432,0,33554432,0 - # tensordot:dot_backward,8388608,0,8388608,0 - # tensordot:dot_head_grad,16777216,0,16777216,0 # tensordot:in_arg:A,8388608,0,8388608,0 # tensordot:in_arg:B,33554432,0,33554432,0 + # tensordot:node_0_backward,33554432,0,33554432,0 + # tensordot:node_0_backward,8388608,0,8388608,0s with open('gpu_memory_profile-pid_%d.csv' % (os.getpid()), mode='r') as csv_file: csv_reader = csv.DictReader(csv_file) - # TODO: Remove this print statement later on. for row in csv_reader: print(",".join(list(row.values()))) for expected_alloc_entry in expected_alloc_entries: @@ -161,7 +161,6 @@ def test_gpu_memory_profiler_gluon(): # there is no unknown entries in the memory profile. with open('gpu_memory_profile-pid_%d.csv' % (os.getpid()), mode='r') as csv_file: csv_reader = csv.DictReader(csv_file) - # TODO: Remove this print statement later on. for row in csv_reader: print(",".join(list(row.values()))) for param in model.collect_params().values(): @@ -185,8 +184,3 @@ def test_gpu_memory_profiler_gluon(): if row['Attribute Name'] == ":unknown" or \ row['Attribute Name'] == ":": assert False, "Unknown allocation entry has been encountered" - - -if __name__ == "__main__": - import nose - nose.runmodule()