diff --git a/nnvm/python/nnvm/frontend/tensorflow.py b/nnvm/python/nnvm/frontend/tensorflow.py index fd6c389850d98..3f606c1447aea 100644 --- a/nnvm/python/nnvm/frontend/tensorflow.py +++ b/nnvm/python/nnvm/frontend/tensorflow.py @@ -33,6 +33,7 @@ def __call__(self, inputs, attrs, *args): self._ignores.append('_input_shapes') self._ignores.append('T') self._ignores.append('use_cudnn_on_gpu') + self._ignores.append('is_training') return AttrConvert(self._op_name, self._transforms, self._excludes, self._disables, self._ignores, self._extras, self._custom_check)(inputs, attrs, *args) @@ -354,13 +355,99 @@ def _impl(inputs, attr, params): # (data, gamma, beta, moving_mean, moving_var) new_inputs = [inputs[0], inputs[4], inputs[3], inputs[1], inputs[2]] + return AttrCvt( op_name='batch_norm', transforms={'scale_after_normalization':'scale', 'variance_epsilon':'epsilon'}, extras={'axis': 3}, # Fix axis + ignores=['data_format'], disables=['momentum'])(new_inputs, attr) return _impl +def _relu6(): + def _impl(inputs, attr, params): + return _sym.clip(inputs[0], a_min=0, a_max=6) + return _impl + +def _depthwise_conv(): + def _impl(inputs, attr, params): + attr['data_format'] = attr['data_format'].decode("utf-8") + input_shapes = attr['_input_shapes'][inputs[0]] + + # Extract kernel shape from params + conv_param_weights = params[inputs[1].list_output_names()[0]] + + if attr['data_format'] == 'NHWC': + kernel_h, kernel_w, in_channels, depth_mult = conv_param_weights.shape + attr['kernel_shape'] = (conv_param_weights.shape[0], conv_param_weights.shape[1]) + attr['channels'] = input_shapes[0][3] * depth_mult + if 'dilations' in attr: + attr['dilations'] = (attr['dilations'][0], attr['dilations'][1]) + elif attr['data_format'] == 'NCHW': + depth_mult, in_channels, kernel_h, kernel_w = conv_param_weights.shape + attr['kernel_shape'] = (conv_param_weights.shape[2], conv_param_weights.shape[3]) + attr['channels'] = input_shapes[0][1] * depth_mult + if 'dilations' in attr: + attr['dilations'] = (attr['dilations'][2], attr['dilations'][3]) + else: + raise TypeError("Unsupported data format type : {}".format(attr['data_format'])) + + # Fix strides + attr['strides'] = (attr['strides'][1], attr['strides'][2]) + + # Fix groups + attr['groups'] = attr['channels'] + + # Fix padding + attr['padding'] = attr['padding'].decode("utf-8") + + if attr['padding'] == 'VALID': + attr['padding'] = [0, 0] + elif attr['padding'] == 'SAME': + stride_h, stride_w = attr['strides'] + kernel_h, kernel_w = attr['kernel_shape'] + if attr['data_format'] == 'NHWC': + in_h = input_shapes[0][1] + in_w = input_shapes[0][2] + else: + in_h = input_shapes[0][2] + in_w = input_shapes[0][3] + + pad_v = _get_pad_pair(in_h, kernel_h, stride_h) + pad_h = _get_pad_pair(in_w, kernel_w, stride_w) + + if attr['data_format'] == 'NHWC': + inputs[0] = _sym.pad(data=inputs[0], + pad_width=((0, 0), + (pad_v[0], pad_v[1]), + (pad_h[0], pad_h[1]), + (0, 0))) + else: + inputs[0] = _sym.pad(data=inputs[0], + pad_width=((0, 0), + (0, 0), + (pad_v[0], pad_v[1]), + (pad_h[0], pad_h[1]))) + + attr['padding'] = [0, 0] + + else: + raise TypeError("Unsupported padding type : {}".format(attr['padding'])) + + if 'kernel_layout' not in attr: + attr['kernel_layout'] = 'HWIO' if attr['data_format'] == 'NHWC' else 'OIHW' + + return AttrCvt( + op_name=_dimension_picker('conv'), + transforms={ + 'kernel_shape': 'kernel_size', + 'data_format': 'layout', + 'dilations': ('dilation', (0, 0)), + 'group': ('groups', 1)}, + extras={'use_bias': len(inputs) == 3}, + custom_check=_dimension_constraint())(inputs, attr) + return _impl + # compatible operators that do NOT require any conversion. _identity_list = [] @@ -392,6 +479,9 @@ def _impl(inputs, attr, params): 'Add' : _elemwise('add'), 'Rsqrt' : _rsqrt(), 'Squeeze' : _squeeze(), + 'FusedBatchNorm' : _batch_norm(), + 'Relu6' : _relu6(), + 'DepthwiseConv2dNative' : _depthwise_conv(), } @@ -458,9 +548,13 @@ def from_tensorflow(self, graph): self._num_input += 1 self._nodes[node.name] = _sym.Variable(name=node.name) - self._output_shapes[node.name] = \ - [tensor_util.TensorShapeProtoToList(shape) \ - for shape in self._parse_attr(node.attr)['_output_shapes']] + try: + self._output_shapes[node.name] = \ + [tensor_util.TensorShapeProtoToList(shape) \ + for shape in self._parse_attr(node.attr)['_output_shapes']] + except KeyError: + raise NotImplementedError( \ + "Please freeze the graph with add_shapes=True") elif node.op == "Const": # Assuming first Const node as Graph Input node if self._input_node == '': @@ -476,13 +570,42 @@ def from_tensorflow(self, graph): raise NotImplementedError( \ "Const {} couldn't be converted to Param.".format(node.name)) - self._output_shapes[node.name] = \ - [tensor_util.TensorShapeProtoToList(shape) \ - for shape in self._parse_attr(node.attr)['_output_shapes']] + try: + self._output_shapes[node.name] = \ + [tensor_util.TensorShapeProtoToList(shape) \ + for shape in self._parse_attr(node.attr)['_output_shapes']] + except KeyError: + raise NotImplementedError( \ + "Please freeze the graph with add_shapes=True") + elif node.op == "Shape": + # We as have all shapes, Shape op could become a simple param. + self._num_param += 1 + attr = self._parse_attr(node.attr) + + in_shape = self._output_shapes[node.input[0]] + # TODO: Implement Shape operator later + in_shape[0] = [1 if x==-1 else x for x in in_shape[0]] + + self._params[node.name] = tvm.nd.array(in_shape[0]) + self._nodes[node.name] = _sym.Variable(name=node.name, + shape=self._params[node.name].shape) + try: + self._output_shapes[node.name] = \ + [tensor_util.TensorShapeProtoToList(shape) \ + for shape in self._parse_attr(node.attr)['_output_shapes']] + except KeyError: + raise NotImplementedError( \ + "Please freeze the graph with add_shapes=True") else: attr = self._parse_attr(node.attr) - self._output_shapes[node.name] = \ - [tensor_util.TensorShapeProtoToList(shape) for shape in attr['_output_shapes']] + + try: + self._output_shapes[node.name] = \ + [tensor_util.TensorShapeProtoToList(shape) \ + for shape in attr['_output_shapes']] + except KeyError: + raise NotImplementedError( \ + "Please freeze the graph with add_shapes=True") # Pass the parsed shapes instead attr["_output_shapes"] = self._output_shapes[node.name] diff --git a/nnvm/python/nnvm/top/nn.py b/nnvm/python/nnvm/top/nn.py index dfebc368eaf3f..4106ce2d68e1c 100644 --- a/nnvm/python/nnvm/top/nn.py +++ b/nnvm/python/nnvm/top/nn.py @@ -98,7 +98,11 @@ def compute_conv2d(attrs, inputs, _): if groups == 1: out = topi.nn.conv2d(inputs[0], kernel, strides, padding, layout) elif groups == get_const_int(inputs[0].shape[1]) and groups == channels: + # NCHW out = topi.nn.depthwise_conv2d_nchw(inputs[0], kernel, strides, padding) + elif groups == get_const_int(inputs[0].shape[3]) and groups == channels: + # NHWC + out = topi.nn.depthwise_conv2d_nhwc(inputs[0], kernel, strides, padding) else: raise ValueError("not support arbitrary group number for now") if attrs.get_bool("use_bias"): diff --git a/nnvm/src/top/nn/convolution.cc b/nnvm/src/top/nn/convolution.cc index 8b66f9757fe39..169f1ed185b59 100644 --- a/nnvm/src/top/nn/convolution.cc +++ b/nnvm/src/top/nn/convolution.cc @@ -79,7 +79,7 @@ inline bool Conv2DInferShape(const nnvm::NodeAttrs& attrs, param.kernel_size[1]}); wshape = ConvertLayout(wshape, kOIHW, kernel_layout); - wshape[0] *= param.groups; + wshape[kernel_layout.indexof('I')] *= param.groups; NNVM_ASSIGN_INPUT_SHAPE(attrs, *in_shape, Conv2DParam::kWeight, wshape); if (param.use_bias) {