-
Notifications
You must be signed in to change notification settings - Fork 143
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Potential tf.keras fix #26
Comments
Dear sir, How to fix the below issue?@sinclairnick
here is my code:
|
@sinclairnick the code is not working, still getting Any comments how to solve it ? Would you please share what is your main function and model ? |
Not sure if I got it right but I think I might've fixed it. Here's the modified code from keras.engine.topology import Layer
import keras.backend as K
class SpatialPyramidPooling(Layer):
"""Spatial pyramid pooling layer for 2D inputs.
See Spatial Pyramid Pooling in Deep Convolutional Networks for Visual Recognition,
K. He, X. Zhang, S. Ren, J. Sun
# Arguments
pool_list: list of int
List of pooling regions to use. The length of the list is the number of pooling regions,
each int in the list is the number of regions in that pool. For example [1,2,4] would be 3
regions with 1, 2x2 and 4x4 max pools, so 21 outputs per feature map
# Input shape
4D tensor with shape:
`(samples, channels, rows, cols)` if dim_ordering='th'
or 4D tensor with shape:
`(samples, rows, cols, channels)` if dim_ordering='tf'.
# Output shape
2D tensor with shape:
`(samples, channels * sum([i * i for i in pool_list])`
"""
def __init__(self, pool_list, **kwargs):
self.dim_ordering = K.image_data_format()
assert self.dim_ordering in {'channels_last', 'channels_first'}, 'dim_ordering must be in {tf, th}'
self.pool_list = pool_list
self.num_outputs_per_channel = sum([i * i for i in pool_list])
super(SpatialPyramidPooling, self).__init__(**kwargs)
def build(self, input_shape):
if self.dim_ordering == 'channels_first':
self.nb_channels = input_shape[1]
elif self.dim_ordering == 'channels_last':
self.nb_channels = input_shape[3]
def compute_output_shape(self, input_shape):
return (input_shape[0], self.nb_channels * self.num_outputs_per_channel)
def get_config(self):
config = {'pool_list': self.pool_list}
base_config = super(SpatialPyramidPooling, self).get_config()
return dict(list(base_config.items()) + list(config.items()))
def call(self, x, mask=None):
input_shape = K.shape(x)
if self.dim_ordering == 'channels_first':
num_rows = input_shape[2]
num_cols = input_shape[3]
elif self.dim_ordering == 'channels_last':
num_rows = input_shape[1]
num_cols = input_shape[2]
row_length = [K.cast(num_rows, 'float32') / i for i in self.pool_list]
col_length = [K.cast(num_cols, 'float32') / i for i in self.pool_list]
outputs = []
if self.dim_ordering == 'channels_first':
for pool_num, num_pool_regions in enumerate(self.pool_list):
for jy in range(num_pool_regions):
for ix in range(num_pool_regions):
x1 = ix * col_length[pool_num]
x2 = ix * col_length[pool_num] + col_length[pool_num]
y1 = jy * row_length[pool_num]
y2 = jy * row_length[pool_num] + row_length[pool_num]
x1 = K.cast(K.round(x1), 'int32')
x2 = K.cast(K.round(x2), 'int32')
y1 = K.cast(K.round(y1), 'int32')
y2 = K.cast(K.round(y2), 'int32')
new_shape = [input_shape[0], input_shape[1],
y2 - y1, x2 - x1]
x_crop = x[:, :, y1:y2, x1:x2]
xm = K.reshape(x_crop, new_shape)
pooled_val = K.max(xm, axis=(2, 3))
outputs.append(pooled_val)
elif self.dim_ordering == 'channels_last':
for pool_num, num_pool_regions in enumerate(self.pool_list):
for jy in range(num_pool_regions):
for ix in range(num_pool_regions):
x1 = ix * col_length[pool_num]
x2 = ix * col_length[pool_num] + col_length[pool_num]
y1 = jy * row_length[pool_num]
y2 = jy * row_length[pool_num] + row_length[pool_num]
x1 = K.cast(K.round(x1), 'int32')
x2 = K.cast(K.round(x2), 'int32')
y1 = K.cast(K.round(y1), 'int32')
y2 = K.cast(K.round(y2), 'int32')
new_shape = [input_shape[0], y2 - y1,
x2 - x1, input_shape[3]]
x_crop = x[:, y1:y2, x1:x2, :]
xm = K.reshape(x_crop, new_shape)
pooled_val = K.max(xm, axis=(1, 2))
outputs.append(pooled_val)
if self.dim_ordering == 'channels_first':
outputs = K.concatenate(outputs)
elif self.dim_ordering == 'channels_last':
#outputs = K.concatenate(outputs,axis = 1)
outputs = K.concatenate(outputs)
# outputs = K.reshape(outputs,(len(self.pool_list),self.num_outputs_per_channel,input_shape[0],input_shape[1]))
outputs = K.reshape(outputs, self.compute_output_shape(input_shape))
#outputs = K.permute_dimensions(outputs,(3,1,0,2))
#outputs = K.reshape(outputs,(input_shape[0], self.num_outputs_per_channel * self.nb_channels))
return outputs |
I tried the following version, with the tensor channel dimensions in the last position (tam_batch, height, width, channel) and it worked BUT before applying the output layer (Dense + Softmax) I applied Layer Normalization with GeLU activation because it was giving me NaN in the loss function. The code used is: import tensorflow as tf
from tensorflow.keras import layers
# taken from https://github.com/yhenon/keras-spp
class SpatialPyramidPooling(layers.Layer):
"""Spatial pyramid pooling layer for 2D inputs.
See Spatial Pyramid Pooling in Deep Convolutional Networks for Visual Recognition,
K. He, X. Zhang, S. Ren, J. Sun
# Arguments
pool_list: list of int
List of pooling regions to use. The length of the list is the number of pooling regions,
each int in the list is the number of regions in that pool. For example [1,2,4] would be 3
regions with 1, 2x2 and 4x4 max pools, so 21 outputs per feature map
# Input shape
4D tensor with shape:
`(samples, channels, rows, cols)` if dim_ordering='th'
or 4D tensor with shape:
`(samples, rows, cols, channels)` if dim_ordering='tf'.
# Output shape
2D tensor with shape:
`(samples, channels * sum([i * i for i in pool_list])`
"""
def __init__(self, pool_list, **kwargs):
super(SpatialPyramidPooling, self).__init__(**kwargs)
self.pool_list = pool_list
self.num_outputs_per_channel = sum([i * i for i in pool_list])
def build(self, input_shape):
self.nb_channels = input_shape[3]
def get_config(self):
config = {'pool_list': self.pool_list}
base_config = super(SpatialPyramidPooling, self).get_config()
return dict(list(base_config.items()) + list(config.items()))
def call(self, x, mask=None):
input_shape = tf.shape(x)
num_rows = input_shape[1]
num_cols = input_shape[2]
row_length = [tf.cast(num_rows, 'float32') / i for i in self.pool_list]
col_length = [tf.cast(num_cols, 'float32') / i for i in self.pool_list]
outputs = []
for pool_num, num_pool_regions in enumerate(self.pool_list):
for jy in range(num_pool_regions):
for ix in range(num_pool_regions):
x1 = ix * col_length[pool_num]
x2 = ix * col_length[pool_num] + col_length[pool_num]
y1 = jy * row_length[pool_num]
y2 = jy * row_length[pool_num] + row_length[pool_num]
x1 = tf.cast(tf.round(x1), dtype = tf.int32)
x2 = tf.cast(tf.round(x2), dtype = tf.int32)
y1 = tf.cast(tf.round(y1), dtype = tf.int32)
y2 = tf.cast(tf.round(y2), dtype = tf.int32)
new_shape = [input_shape[0], y2 - y1, x2 - x1, input_shape[3]]
x_crop = x[:, y1:y2, x1:x2, :]
xm = tf.reshape(tensor = x_crop, shape = new_shape)
pooled_val = tf.reduce_max(xm, axis=(1, 2))
outputs.append(pooled_val)
outputs = tf.concat(outputs, axis = 1)
return outputs |
Posting this for future people who may run into the issues I did.
I couldn't get the original implementation to work properly for me, using
tf.keras
. Hence, I made the necessary modifications to produce the correct output shapes. Currently the output shape is defined asThe code I used to achieve this is:
The text was updated successfully, but these errors were encountered: