Skip to content
This repository has been archived by the owner on Nov 17, 2023. It is now read-only.

Commit

Permalink
Correctly import Caffe BatchNorm (#6176)
Browse files Browse the repository at this point in the history
* Correctly import Caffe BatchNorm

* Compensate for cudnn epsilon shift by changing the variance

Cudnn requires BatchNorm variance eps to be bigger than 1e-05 (CUDNN_BN_MIN_EPSILON). Before this commit eps values were clipped to 1.1e-05, thus introducing a small numerical discrepancy in evaluation.

This discrepancy is avoided here by compensating from this shift in the actual variance value.

* Improved epsilon shift compensation and comments
  • Loading branch information
matteosal authored and mli committed May 15, 2017
1 parent 87927ee commit cde5361
Show file tree
Hide file tree
Showing 2 changed files with 22 additions and 7 deletions.
21 changes: 15 additions & 6 deletions tools/caffe_converter/convert_model.py
Original file line number Diff line number Diff line change
Expand Up @@ -43,6 +43,7 @@ def convert_model(prototxt_fname, caffemodel_fname, output_prefix=None):

layers, names = caffe_parser.read_caffemodel(prototxt_fname, caffemodel_fname)
layer_iter = caffe_parser.layer_iter(layers, names)
layers_proto = caffe_parser.get_layers(caffe_parser.read_prototxt(prototxt_fname))

for layer_name, layer_type, layer_blobs in layer_iter:
if layer_type == 'Convolution' or layer_type == 'InnerProduct' or layer_type == 4 or layer_type == 14 \
Expand Down Expand Up @@ -120,18 +121,26 @@ def convert_model(prototxt_fname, caffemodel_fname, output_prefix=None):
bn_name = layer_name
mean = layer_blobs[0].data
var = layer_blobs[1].data
moving_average_factor = layer_blobs[2].data
rescale_factor = layer_blobs[2].data
if rescale_factor != 0:
rescale_factor = 1 / rescale_factor
mean_name = '{}_moving_mean'.format(bn_name)
var_name = '{}_moving_var'.format(bn_name)
maf_name = '{}_momentum'.format(bn_name)
mean = mean.reshape(aux_shape_dic[mean_name])
var = var.reshape(aux_shape_dic[var_name])
aux_params[mean_name] = mx.nd.zeros(mean.shape)
aux_params[var_name] = mx.nd.zeros(var.shape)
arg_params[maf_name] = mx.nd.zeros(moving_average_factor.shape)
aux_params[mean_name][:] = mean
aux_params[var_name][:] = var
arg_params[maf_name][:] = moving_average_factor
# Get the original epsilon
for idx, layer in enumerate(layers_proto):
if layer.name == bn_name:
bn_index = idx
eps_caffe = layers_proto[bn_index].batch_norm_param.eps
# Compensate for the epsilon shift performed in convert_symbol
eps_symbol = float( sym.attr_dict()[bn_name + '_moving_mean']['eps'] )
eps_correction = eps_caffe - eps_symbol
# Fill parameters
aux_params[mean_name][:] = mean * rescale_factor
aux_params[var_name][:] = var * rescale_factor + eps_correction
assert var.flags['C_CONTIGUOUS'] is True
assert mean.flags['C_CONTIGUOUS'] is True
print ('converting batchnorm layer, mean shape = {}, var shape = {}'.format(mean.shape, var.shape))
Expand Down
8 changes: 7 additions & 1 deletion tools/caffe_converter/convert_symbol.py
Original file line number Diff line number Diff line change
Expand Up @@ -155,7 +155,13 @@ def _parse_proto(prototxt_fname):
if layer[i].type == 'BatchNorm':
type_string = 'mx.symbol.BatchNorm'
param = layer[i].batch_norm_param
param_string = 'use_global_stats=%s, fix_gamma=False' % param.use_global_stats
# CuDNN requires eps to be greater than 1e-05
# We compensate for this change in convert_model
epsilon = param.eps
if(epsilon <= 1e-05):
epsilon = 1e-04
param_string = 'use_global_stats=%s, fix_gamma=False, eps=%f' % (
param.use_global_stats, epsilon)
need_flatten[name] = need_flatten[mapping[layer[i].bottom[0]]]
if layer[i].type == 'Scale':
assert layer[i-1].type == 'BatchNorm'
Expand Down

0 comments on commit cde5361

Please sign in to comment.