Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add fp16 training for ResNeXt101 #4947

Open
wants to merge 2 commits into
base: dev-static
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
37 changes: 19 additions & 18 deletions PaddleCV/image_classification/build_model.py
Original file line number Diff line number Diff line change
Expand Up @@ -35,15 +35,16 @@ def _calc_label_smoothing_loss(softmax_out, label, class_dim, epsilon):
def _basic_model(data, model, args, is_train):
image = data[0]
label = data[1]
if args.model == "ResNet50":
image_in = fluid.layers.transpose(
image, [0, 2, 3, 1]) if args.data_format == 'NHWC' else image
image_in.stop_gradient = image.stop_gradient
net_out = model.net(input=image_in,
class_dim=args.class_dim,
data_format=args.data_format)
else:
net_out = model.net(input=image, class_dim=args.class_dim)
print ("args.data_format:", args.data_format)
# if args.model == ("ResNet50" or "ResNeXt101_32x4d"):
image_in = fluid.layers.transpose(
image, [0, 2, 3, 1]) if args.data_format == 'NHWC' else image
image_in.stop_gradient = image.stop_gradient
net_out = model.net(input=image_in,
class_dim=args.class_dim,
data_format=args.data_format)
# else:
# net_out = model.net(input=image, class_dim=args.class_dim)
softmax_out = fluid.layers.softmax(net_out, use_cudnn=False)

if is_train and args.use_label_smoothing:
Expand Down Expand Up @@ -95,15 +96,15 @@ def _mixup_model(data, model, args, is_train):
y_b = data[2]
lam = data[3]

if args.model == "ResNet50":
image_in = fluid.layers.transpose(
image, [0, 2, 3, 1]) if args.data_format == 'NHWC' else image
image_in.stop_gradient = image.stop_gradient
net_out = model.net(input=image_in,
class_dim=args.class_dim,
data_format=args.data_format)
else:
net_out = model.net(input=image, class_dim=args.class_dim)
# if args.model == "ResNet50":
image_in = fluid.layers.transpose(
image, [0, 2, 3, 1]) if args.data_format == 'NHWC' else image
image_in.stop_gradient = image.stop_gradient
net_out = model.net(input=image_in,
class_dim=args.class_dim,
data_format=args.data_format)
# else:
# net_out = model.net(input=image, class_dim=args.class_dim)
softmax_out = fluid.layers.softmax(net_out, use_cudnn=False)
if not args.use_label_smoothing:
loss_a = fluid.layers.cross_entropy(input=softmax_out, label=y_a)
Expand Down
45 changes: 29 additions & 16 deletions PaddleCV/image_classification/models/resnext.py
Original file line number Diff line number Diff line change
Expand Up @@ -33,7 +33,7 @@ def __init__(self, layers=50, cardinality=64):
self.layers = layers
self.cardinality = cardinality

def net(self, input, class_dim=1000):
def net(self, input, class_dim=1000, data_format="NCHW"):
layers = self.layers
cardinality = self.cardinality
supported_layers = [50, 101, 152]
Expand All @@ -56,13 +56,15 @@ def net(self, input, class_dim=1000):
filter_size=7,
stride=2,
act='relu',
name="res_conv1") #debug
name="res_conv1", #debug
data_format=data_format)
conv = fluid.layers.pool2d(
input=conv,
pool_size=3,
pool_stride=2,
pool_padding=1,
pool_type='max')
pool_type='max',
data_format=data_format)

for block in range(len(depth)):
for i in range(depth[block]):
Expand All @@ -79,10 +81,11 @@ def net(self, input, class_dim=1000):
if cardinality == 64 else num_filters2[block],
stride=2 if i == 0 and block != 0 else 1,
cardinality=cardinality,
name=conv_name)
name=conv_name,
data_format=data_format)

pool = fluid.layers.pool2d(
input=conv, pool_type='avg', global_pooling=True)
input=conv, pool_type='avg', global_pooling=True, data_format=data_format)
stdv = 1.0 / math.sqrt(pool.shape[1] * 1.0)
out = fluid.layers.fc(
input=pool,
Expand All @@ -100,7 +103,8 @@ def conv_bn_layer(self,
stride=1,
groups=1,
act=None,
name=None):
name=None,
data_format='NCHW'):
conv = fluid.layers.conv2d(
input=input,
num_filters=num_filters,
Expand All @@ -111,7 +115,8 @@ def conv_bn_layer(self,
act=None,
param_attr=ParamAttr(name=name + "_weights"),
bias_attr=False,
name=name + '.conv2d.output.1')
name=name + '.conv2d.output.1',
data_format=data_format)
if name == "conv1":
bn_name = "bn_" + name
else:
Expand All @@ -123,43 +128,51 @@ def conv_bn_layer(self,
param_attr=ParamAttr(name=bn_name + '_scale'),
bias_attr=ParamAttr(bn_name + '_offset'),
moving_mean_name=bn_name + '_mean',
moving_variance_name=bn_name + '_variance', )
moving_variance_name=bn_name + '_variance',
data_layout=data_format)

def shortcut(self, input, ch_out, stride, name):
ch_in = input.shape[1]
def shortcut(self, input, ch_out, stride, name, data_format):
if data_format == "NCHW":
ch_in = input.shape[1]
else:
ch_in = input.shape[-1]
if ch_in != ch_out or stride != 1:
return self.conv_bn_layer(input, ch_out, 1, stride, name=name)
return self.conv_bn_layer(input, ch_out, 1, stride, name=name, data_format=data_format)
else:
return input

def bottleneck_block(self, input, num_filters, stride, cardinality, name):
def bottleneck_block(self, input, num_filters, stride, cardinality, name, data_format):
cardinality = self.cardinality
conv0 = self.conv_bn_layer(
input=input,
num_filters=num_filters,
filter_size=1,
act='relu',
name=name + "_branch2a")
name=name + "_branch2a",
data_format=data_format)
conv1 = self.conv_bn_layer(
input=conv0,
num_filters=num_filters,
filter_size=3,
stride=stride,
groups=cardinality,
act='relu',
name=name + "_branch2b")
name=name + "_branch2b",
data_format=data_format)
conv2 = self.conv_bn_layer(
input=conv1,
num_filters=num_filters if cardinality == 64 else num_filters * 2,
filter_size=1,
act=None,
name=name + "_branch2c")
name=name + "_branch2c",
data_format=data_format)

short = self.shortcut(
input,
num_filters if cardinality == 64 else num_filters * 2,
stride,
name=name + "_branch1")
name=name + "_branch1",
data_format=data_format)

return fluid.layers.elementwise_add(
x=short, y=conv2, act='relu', name=name + ".add.output.5")
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,49 @@
#!/bin/bash -ex

export CUDA_VISIBLE_DEVICES=5
export FLAGS_conv_workspace_size_limit=4000 #MB
export FLAGS_cudnn_exhaustive_search=0
export FLAGS_cudnn_batchnorm_spatial_persistent=1

DATA_DIR="/ssd3/datasets/ILSVRC2012"

DATA_FORMAT="NHWC"
USE_FP16=false #whether to use float16
USE_DALI=true
USE_ADDTO=true

if ${USE_ADDTO} ;then
export FLAGS_max_inplace_grad_add=8
fi

if ${USE_DALI}; then
export FLAGS_fraction_of_gpu_memory_to_use=0.8
fi

nvprof -o timeline_output -f --cpu-profiling off --profile-from-start off python train.py \
--model=ResNeXt101_32x4d \
--data_dir=${DATA_DIR} \
--batch_size=32 \
--total_images=1281167 \
--image_shape 4 224 224 \
--class_dim=1000 \
--print_step=10 \
--model_save_dir=output/ \
--lr_strategy=piecewise_decay \
--use_fp16=${USE_FP16} \
--scale_loss=128.0 \
--use_dynamic_loss_scaling=true \
--data_format=${DATA_FORMAT} \
--fuse_elewise_add_act_ops=true \
--fuse_bn_act_ops=true \
--fuse_bn_add_act_ops=true \
--enable_addto=${USE_ADDTO} \
--validate=true \
--is_profiler=false \
--profiler_path=profile/ \
--reader_thread=10 \
--reader_buf_size=4000 \
--use_dali=${USE_DALI} \
--lr=0.1


6 changes: 3 additions & 3 deletions PaddleCV/image_classification/scripts/train/ResNet50_fp16.sh
Original file line number Diff line number Diff line change
@@ -1,11 +1,11 @@
#!/bin/bash -ex

export CUDA_VISIBLE_DEVICES=4
export FLAGS_conv_workspace_size_limit=4000 #MB
export FLAGS_cudnn_exhaustive_search=1
export FLAGS_cudnn_batchnorm_spatial_persistent=1


DATA_DIR="Your image dataset path, e.g. /work/datasets/ILSVRC2012/"
DATA_DIR="/ssd3/datasets/ILSVRC2012"

DATA_FORMAT="NHWC"
USE_FP16=true #whether to use float16
Expand All @@ -23,7 +23,7 @@ fi
python train.py \
--model=ResNet50 \
--data_dir=${DATA_DIR} \
--batch_size=256 \
--batch_size=128 \
--total_images=1281167 \
--image_shape 4 224 224 \
--class_dim=1000 \
Expand Down
13 changes: 13 additions & 0 deletions PaddleCV/image_classification/train.py
Original file line number Diff line number Diff line change
Expand Up @@ -264,8 +264,18 @@ def train(args):
test_iter = test_data_loader()

batch_start = time.time()
ips_avg = []
for batch in train_iter:
#NOTE: this is for benchmark

# if total_batch_num == 200:
# fluid.core.nvprof_start()
# if total_batch_num == 210:
# fluid.core.nvprof_stop()
if total_batch_num == 200:
print(">>>>>>>>>>>>>>>>>>>>>>>>>> Average ips: ", np.mean(ips_avg),">>>>>>>>>>>>>>>>>>>>>>>>")
#fluid.core.nvprof_stop()
return
if args.max_iter and total_batch_num == args.max_iter:
return
reader_cost_averager.record(time.time() - batch_start)
Expand All @@ -285,6 +295,9 @@ def train(args):

if trainer_id == 0:
ips = float(args.batch_size) / batch_cost_averager.get_average()
if total_batch_num > 40:
ips_avg.append(ips)

print_info(
"batch",
train_batch_metrics_avg,
Expand Down
3 changes: 2 additions & 1 deletion PaddleCV/image_classification/utils/utility.py
Original file line number Diff line number Diff line change
Expand Up @@ -200,7 +200,8 @@ def check_version():
"Please make sure the version is good with your code." \

try:
fluid.require_version('1.6.0')
a = 1
#fluid.require_version('1.6.0')
except Exception as e:
logger.error(err)
sys.exit(1)
Expand Down