You signed in with another tab or window. Reload to refresh your session.You signed out in another tab or window. Reload to refresh your session.You switched accounts on another tab or window. Reload to refresh your session.Dismiss alert
When training with two GPUs, the ERROR is as follws:ResourceExhaustedError (see above for traceback): OOM when allocating tensor with shape[512,512,50,14]
#32
Traceback (most recent call last):
File "train.py", line 196, in
train()
File "train.py", line 176, in train
_, global_stepnp, summary_str = sess.run([train_op, global_step, summary_op])
File "/usr/local/lib/python2.7/dist-packages/tensorflow/python/client/session.py", line 789, in run
run_metadata_ptr)
File "/usr/local/lib/python2.7/dist-packages/tensorflow/python/client/session.py", line 997, in _run
feed_dict_string, options, run_metadata)
File "/usr/local/lib/python2.7/dist-packages/tensorflow/python/client/session.py", line 1132, in _do_run
target_list, options, run_metadata)
File "/usr/local/lib/python2.7/dist-packages/tensorflow/python/client/session.py", line 1152, in _do_call
raise type(e)(node_def, op, message)
tensorflow.python.framework.errors_impl.ResourceExhaustedError: OOM when allocating tensor with shape[512,512,50,14]
[[Node: Fast-RCNN/resnet_v1_101/block4/unit_1/bottleneck_v1/conv2/convolution = Conv2D[T=DT_FLOAT, data_format="NHWC", padding="SAME", strides=[1, 1, 1, 1], use_cudnn_on_gpu=true, _device="/job:localhost/replica:0/task:0/gpu:0"](Fast-RCNN/resnet_v1_101/block4/unit_1/bottleneck_v1/conv1/Relu, Fast-RCNN/resnet_v1_101/block4/unit_1/bottleneck_v1/conv2/weights/read)]]
[[Node: Momentum/update/_1298 = _Recvclient_terminated=false, recv_device="/job:localhost/replica:0/task:0/cpu:0", send_device="/job:localhost/replica:0/task:0/gpu:0", send_device_incarnation=1, tensor_name="edge_11317_Momentum/update", tensor_type=DT_FLOAT, _device="/job:localhost/replica:0/task:0/cpu:0"]]
Caused by op u'Fast-RCNN/resnet_v1_101/block4/unit_1/bottleneck_v1/conv2/convolution', defined at:
File "train.py", line 196, in
train()
File "train.py", line 55, in train
gtboxes_batch=gtboxes_and_label)
File "../libs/networks/build_whole_network.py", line 464, in build_whole_detection_network
img_shape=img_shape)
File "../libs/networks/build_whole_network.py", line 211, in build_fastrcnn
scope=self.base_network_name)
File "../libs/networks/resnet.py", line 178, in restnet_head
include_root_block=False, scope=scope)
File "../libs/networks/slim_nets/resnet_v1.py", line 196, in resnet_v1
net = resnet_utils.stack_blocks_dense(net, blocks, output_stride)
File "/usr/local/lib/python2.7/dist-packages/tensorflow/contrib/framework/python/ops/arg_scope.py", line 181, in func_with_args
return func(*args, **current_args)
File "../libs/networks/slim_nets/resnet_utils.py", line 189, in stack_blocks_dense
net = block.unit_fn(net, rate=1, **unit)
File "/usr/local/lib/python2.7/dist-packages/tensorflow/contrib/framework/python/ops/arg_scope.py", line 181, in func_with_args
return func(*args, **current_args)
File "../libs/networks/slim_nets/resnet_v1.py", line 104, in bottleneck
rate=rate, scope='conv2')
File "../libs/networks/slim_nets/resnet_utils.py", line 113, in conv2d_same
padding='SAME', scope=scope)
File "/usr/local/lib/python2.7/dist-packages/tensorflow/contrib/framework/python/ops/arg_scope.py", line 181, in func_with_args
return func(*args, **current_args)
File "/usr/local/lib/python2.7/dist-packages/tensorflow/contrib/layers/python/layers/layers.py", line 947, in convolution
outputs = layer.apply(inputs)
File "/usr/local/lib/python2.7/dist-packages/tensorflow/python/layers/base.py", line 492, in apply
return self.call(inputs, *args, **kwargs)
File "/usr/local/lib/python2.7/dist-packages/tensorflow/python/layers/base.py", line 441, in call
outputs = self.call(inputs, *args, **kwargs)
File "/usr/local/lib/python2.7/dist-packages/tensorflow/python/layers/convolutional.py", line 158, in call
data_format=utils.convert_data_format(self.data_format, self.rank + 2))
File "/usr/local/lib/python2.7/dist-packages/tensorflow/python/ops/nn_ops.py", line 670, in convolution
op=op)
File "/usr/local/lib/python2.7/dist-packages/tensorflow/python/ops/nn_ops.py", line 338, in with_space_to_batch
return op(input, num_spatial_dims, padding)
File "/usr/local/lib/python2.7/dist-packages/tensorflow/python/ops/nn_ops.py", line 662, in op
name=name)
File "/usr/local/lib/python2.7/dist-packages/tensorflow/python/ops/nn_ops.py", line 131, in _non_atrous_convolution
name=name)
File "/usr/local/lib/python2.7/dist-packages/tensorflow/python/ops/gen_nn_ops.py", line 399, in conv2d
data_format=data_format, name=name)
File "/usr/local/lib/python2.7/dist-packages/tensorflow/python/framework/op_def_library.py", line 767, in apply_op
op_def=op_def)
File "/usr/local/lib/python2.7/dist-packages/tensorflow/python/framework/ops.py", line 2506, in create_op
original_op=self._default_original_op, op_def=op_def)
File "/usr/local/lib/python2.7/dist-packages/tensorflow/python/framework/ops.py", line 1269, in init
self._traceback = _extract_stack()
Traceback (most recent call last):
File "train.py", line 196, in
train()
File "train.py", line 176, in train
_, global_stepnp, summary_str = sess.run([train_op, global_step, summary_op])
File "/usr/local/lib/python2.7/dist-packages/tensorflow/python/client/session.py", line 789, in run
run_metadata_ptr)
File "/usr/local/lib/python2.7/dist-packages/tensorflow/python/client/session.py", line 997, in _run
feed_dict_string, options, run_metadata)
File "/usr/local/lib/python2.7/dist-packages/tensorflow/python/client/session.py", line 1132, in _do_run
target_list, options, run_metadata)
File "/usr/local/lib/python2.7/dist-packages/tensorflow/python/client/session.py", line 1152, in _do_call
raise type(e)(node_def, op, message)
tensorflow.python.framework.errors_impl.ResourceExhaustedError: OOM when allocating tensor with shape[512,512,50,14]
[[Node: Fast-RCNN/resnet_v1_101/block4/unit_1/bottleneck_v1/conv2/convolution = Conv2D[T=DT_FLOAT, data_format="NHWC", padding="SAME", strides=[1, 1, 1, 1], use_cudnn_on_gpu=true, _device="/job:localhost/replica:0/task:0/gpu:0"](Fast-RCNN/resnet_v1_101/block4/unit_1/bottleneck_v1/conv1/Relu, Fast-RCNN/resnet_v1_101/block4/unit_1/bottleneck_v1/conv2/weights/read)]]
[[Node: Momentum/update/_1298 = _Recvclient_terminated=false, recv_device="/job:localhost/replica:0/task:0/cpu:0", send_device="/job:localhost/replica:0/task:0/gpu:0", send_device_incarnation=1, tensor_name="edge_11317_Momentum/update", tensor_type=DT_FLOAT, _device="/job:localhost/replica:0/task:0/cpu:0"]]
Caused by op u'Fast-RCNN/resnet_v1_101/block4/unit_1/bottleneck_v1/conv2/convolution', defined at:
File "train.py", line 196, in
train()
File "train.py", line 55, in train
gtboxes_batch=gtboxes_and_label)
File "../libs/networks/build_whole_network.py", line 464, in build_whole_detection_network
img_shape=img_shape)
File "../libs/networks/build_whole_network.py", line 211, in build_fastrcnn
scope=self.base_network_name)
File "../libs/networks/resnet.py", line 178, in restnet_head
include_root_block=False, scope=scope)
File "../libs/networks/slim_nets/resnet_v1.py", line 196, in resnet_v1
net = resnet_utils.stack_blocks_dense(net, blocks, output_stride)
File "/usr/local/lib/python2.7/dist-packages/tensorflow/contrib/framework/python/ops/arg_scope.py", line 181, in func_with_args
return func(*args, **current_args)
File "../libs/networks/slim_nets/resnet_utils.py", line 189, in stack_blocks_dense
net = block.unit_fn(net, rate=1, **unit)
File "/usr/local/lib/python2.7/dist-packages/tensorflow/contrib/framework/python/ops/arg_scope.py", line 181, in func_with_args
return func(*args, **current_args)
File "../libs/networks/slim_nets/resnet_v1.py", line 104, in bottleneck
rate=rate, scope='conv2')
File "../libs/networks/slim_nets/resnet_utils.py", line 113, in conv2d_same
padding='SAME', scope=scope)
File "/usr/local/lib/python2.7/dist-packages/tensorflow/contrib/framework/python/ops/arg_scope.py", line 181, in func_with_args
return func(*args, **current_args)
File "/usr/local/lib/python2.7/dist-packages/tensorflow/contrib/layers/python/layers/layers.py", line 947, in convolution
outputs = layer.apply(inputs)
File "/usr/local/lib/python2.7/dist-packages/tensorflow/python/layers/base.py", line 492, in apply
return self.call(inputs, *args, **kwargs)
File "/usr/local/lib/python2.7/dist-packages/tensorflow/python/layers/base.py", line 441, in call
outputs = self.call(inputs, *args, **kwargs)
File "/usr/local/lib/python2.7/dist-packages/tensorflow/python/layers/convolutional.py", line 158, in call
data_format=utils.convert_data_format(self.data_format, self.rank + 2))
File "/usr/local/lib/python2.7/dist-packages/tensorflow/python/ops/nn_ops.py", line 670, in convolution
op=op)
File "/usr/local/lib/python2.7/dist-packages/tensorflow/python/ops/nn_ops.py", line 338, in with_space_to_batch
return op(input, num_spatial_dims, padding)
File "/usr/local/lib/python2.7/dist-packages/tensorflow/python/ops/nn_ops.py", line 662, in op
name=name)
File "/usr/local/lib/python2.7/dist-packages/tensorflow/python/ops/nn_ops.py", line 131, in _non_atrous_convolution
name=name)
File "/usr/local/lib/python2.7/dist-packages/tensorflow/python/ops/gen_nn_ops.py", line 399, in conv2d
data_format=data_format, name=name)
File "/usr/local/lib/python2.7/dist-packages/tensorflow/python/framework/op_def_library.py", line 767, in apply_op
op_def=op_def)
File "/usr/local/lib/python2.7/dist-packages/tensorflow/python/framework/ops.py", line 2506, in create_op
original_op=self._default_original_op, op_def=op_def)
File "/usr/local/lib/python2.7/dist-packages/tensorflow/python/framework/ops.py", line 1269, in init
self._traceback = _extract_stack()
ResourceExhaustedError (see above for traceback): OOM when allocating tensor with shape[512,512,50,14]
[[Node: Fast-RCNN/resnet_v1_101/block4/unit_1/bottleneck_v1/conv2/convolution = Conv2D[T=DT_FLOAT, data_format="NHWC", padding="SAME", strides=[1, 1, 1, 1], use_cudnn_on_gpu=true, _device="/job:localhost/replica:0/task:0/gpu:0"](Fast-RCNN/resnet_v1_101/block4/unit_1/bottleneck_v1/conv1/Relu, Fast-RCNN/resnet_v1_101/block4/unit_1/bottleneck_v1/conv2/weights/read)]]
[[Node: Momentum/update/_1298 = _Recvclient_terminated=false, recv_device="/job:localhost/replica:0/task:0/cpu:0", send_device="/job:localhost/replica:0/task:0/gpu:0", send_device_incarnation=1, tensor_name="edge_11317_Momentum/update", tensor_type=DT_FLOAT, _device="/job:localhost/replica:0/task:0/cpu:0"]]
The text was updated successfully, but these errors were encountered: