Skip to content
This repository has been archived by the owner on Jun 13, 2022. It is now read-only.

Commit

Permalink
Merge pull request #27 from joristaglio/master
Browse files Browse the repository at this point in the history
Fixed batch processing/multi-column testing, deprecated unused parameters
  • Loading branch information
joristaglio authored Mar 1, 2018
2 parents c44e1cb + 9206ef8 commit ea8433f
Show file tree
Hide file tree
Showing 36 changed files with 230 additions and 208 deletions.
23 changes: 22 additions & 1 deletion .pytest_cache/v/cache/lastfailed
Original file line number Diff line number Diff line change
Expand Up @@ -5,5 +5,26 @@
"tests/image_featurizer_test.py::test_load_and_featurize_data_multiple_columns[squeezenet]": true,
"tests/image_featurizer_test.py::test_load_and_featurize_data_multiple_columns[vgg16]": true,
"tests/image_featurizer_test.py::test_load_and_featurize_data_multiple_columns[vgg19]": true,
"tests/image_featurizer_test.py::test_load_and_featurize_data_multiple_columns[xception]": true
"tests/image_featurizer_test.py::test_load_and_featurize_data_multiple_columns[xception]": true,
"tests/image_featurizer_test.py::test_load_and_featurize_data_multiple_columns_no_batch_processing[inceptionv3]": true,
"tests/image_featurizer_test.py::test_load_and_featurize_data_multiple_columns_no_batch_processing[resnet50]": true,
"tests/image_featurizer_test.py::test_load_and_featurize_data_multiple_columns_no_batch_processing[xception]": true,
"tests/image_featurizer_test.py::test_load_and_featurize_data_multiple_columns_with_batch_processing[inceptionv3]": true,
"tests/image_featurizer_test.py::test_load_and_featurize_data_multiple_columns_with_batch_processing[resnet50]": true,
"tests/image_featurizer_test.py::test_load_and_featurize_data_multiple_columns_with_batch_processing[squeezenet]": true,
"tests/image_featurizer_test.py::test_load_and_featurize_data_multiple_columns_with_batch_processing[vgg16]": true,
"tests/image_featurizer_test.py::test_load_and_featurize_data_multiple_columns_with_batch_processing[vgg19]": true,
"tests/image_featurizer_test.py::test_load_and_featurize_data_multiple_columns_with_batch_processing[xception]": true,
"tests/image_featurizer_test.py::test_load_and_featurize_single_column_no_batch_processing[inceptionv3]": true,
"tests/image_featurizer_test.py::test_load_and_featurize_single_column_no_batch_processing[resnet50]": true,
"tests/image_featurizer_test.py::test_load_and_featurize_single_column_no_batch_processing[squeezenet]": true,
"tests/image_featurizer_test.py::test_load_and_featurize_single_column_no_batch_processing[vgg16]": true,
"tests/image_featurizer_test.py::test_load_and_featurize_single_column_no_batch_processing[vgg19]": true,
"tests/image_featurizer_test.py::test_load_and_featurize_single_column_no_batch_processing[xception]": true,
"tests/image_featurizer_test.py::test_load_and_featurize_single_column_with_batch_processing[inceptionv3]": true,
"tests/image_featurizer_test.py::test_load_and_featurize_single_column_with_batch_processing[resnet50]": true,
"tests/image_featurizer_test.py::test_load_and_featurize_single_column_with_batch_processing[squeezenet]": true,
"tests/image_featurizer_test.py::test_load_and_featurize_single_column_with_batch_processing[vgg16]": true,
"tests/image_featurizer_test.py::test_load_and_featurize_single_column_with_batch_processing[vgg19]": true,
"tests/image_featurizer_test.py::test_load_and_featurize_single_column_with_batch_processing[xception]": true
}
2 changes: 1 addition & 1 deletion pic2vec/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@
_initialize_model, _check_downsampling_mismatch,
build_featurizer)

from pic2vec.feature_preprocessing import (_create_csv_with_image_paths, # NOQA
from pic2vec.feature_preprocessing import (_create_df_with_image_paths, # NOQA
_find_directory_image_paths,
_find_csv_image_paths,
_find_combined_image_paths,
Expand Down
24 changes: 7 additions & 17 deletions pic2vec/data_featurizing.py
Original file line number Diff line number Diff line change
Expand Up @@ -112,7 +112,7 @@ def _named_path_finder(csv_name, model_str, model_depth, model_output,
return named_path


def _create_features_df_helper(data_array, full_feature_array, image_column_header, df):
def _create_features_df_helper(data_array, full_feature_array, image_column_header):
# Log how many photos are missing or blank:
zeros_index = [np.count_nonzero(array_slice) == 0 for array_slice in data_array[:]]
logging.info('Number of missing photos: {}'.format(len(zeros_index)))
Expand All @@ -128,13 +128,12 @@ def _create_features_df_helper(data_array, full_feature_array, image_column_head
df_missing = pd.DataFrame(data=zeros_index, columns=missing_column_header)

# Create the full combined csv+features dataframe
df_full = pd.concat([df, df_missing, df_features], axis=1)
df_features_full = pd.concat([df_missing, df_features], axis=1)

return df_full, df_features
return df_features_full


def create_features(data_array, new_feature_array, df_prev, image_column_header,
image_list, continued_column=False, df_features_prev=pd.DataFrame(),
def create_features(data_array, new_feature_array, image_column_header,
save_features=False):
"""
Write the feature array to a new csv, and append the features to the appropriate
Expand Down Expand Up @@ -166,12 +165,6 @@ def create_features(data_array, new_feature_array, df_prev, image_column_header,

# -------------- #
# ERROR CHECKING #

# Raise error if the image_column_header is not in the csv
if image_column_header not in df_prev.columns:
raise ValueError('Must pass the name of the column where the images are '
'stored in the csv. The column passed was not in the csv.')

# Raise error if the data array has the wrong shape
if len(data_array.shape) != 4:
raise ValueError('Data array must be 4D array, with shape: [batch, height, width, channel].'
Expand All @@ -185,11 +178,8 @@ def create_features(data_array, new_feature_array, df_prev, image_column_header,

logging.info('Adding image features to csv.')

df_full, df_features = _create_features_df_helper(data_array, new_feature_array,
image_column_header, df_prev)

if continued_column and save_features:
df_features = pd.concat([df_features_prev, df_features], axis=1)
df_features = _create_features_df_helper(data_array, new_feature_array,
image_column_header)

# Return the full combined dataframe
return df_full, df_features
return df_features
14 changes: 4 additions & 10 deletions pic2vec/feature_preprocessing.py
Original file line number Diff line number Diff line change
Expand Up @@ -63,7 +63,7 @@
}


def _create_csv_with_image_paths(list_of_images, new_csv_name, image_column_header, save_csv):
def _create_df_with_image_paths(list_of_images, image_column_header):
"""
Take in a list of image names, and create a new csv file where each
image name is a new row.
Expand All @@ -86,11 +86,6 @@ def _create_csv_with_image_paths(list_of_images, new_csv_name, image_column_head
"""
df = pd.DataFrame(list_of_images, columns=[image_column_header])

if save_csv:
_create_csv_path(new_csv_name)
df.to_csv(new_csv_name, index=False)

return df


Expand Down Expand Up @@ -244,7 +239,7 @@ def _find_combined_image_paths(image_path, csv_path, image_column_header):
return list_of_images, df


def _image_paths_finder(image_path, csv_path, image_column_header, new_csv_name, save_csv):
def _image_paths_finder(image_path, csv_path, image_column_header, new_csv_name):
"""
Given an image column header, and either a csv path or an image directory,
find the list of image paths. If just a csv, it's pulled from the column.
Expand Down Expand Up @@ -278,9 +273,8 @@ def _image_paths_finder(image_path, csv_path, image_column_header, new_csv_name,
list_of_images = _find_directory_image_paths(image_path)

# Create the new csv in a folder called 'featurizer_csv/'
df = _create_csv_with_image_paths(list_of_images, new_csv_name=new_csv_name,
image_column_header=image_column_header,
save_csv=save_csv)
df = _create_df_with_image_paths(list_of_images,
image_column_header=image_column_header)

logging.warning('Created csv from directory. Stored at {}'.format(new_csv_name))

Expand Down
53 changes: 24 additions & 29 deletions pic2vec/image_featurizer.py
Original file line number Diff line number Diff line change
Expand Up @@ -189,8 +189,7 @@ def load_data(self,
csv_path='',
new_csv_name='featurizer_csv/generated_images_csv',
grayscale=False,
save_data=True,
save_csv=False
save_data=True
# crop_size = (299, 299),
# number_crops = 0,
# random_crop = False,
Expand Down Expand Up @@ -245,13 +244,13 @@ def load_data(self,
raise ValueError('If building the csv from an image directory, the featurizer can '
'only create a single image column. If two image columns are '
'needed, please create a csv to pass in.')
_create_csv_path(new_csv_name)

# If the image_dict hasn't been passed in (which only happens in batch processing),
# build the full image dict and save the original dataframe
if not image_dict:
image_dict, df = self._full_image_dict_finder(image_path, csv_path, image_column_headers,
new_csv_name, save_csv)
image_dict, df = self._full_image_dict_finder(image_path, csv_path,
image_column_headers,
new_csv_name)
self.df_original = df
self.full_dataframe = df
self.image_column_headers = image_column_headers
Expand All @@ -268,7 +267,6 @@ def load_data(self,
self.csv_path = csv_path
self.image_path = image_path
self.scaled_size = scaled_size

return full_image_data

@t.guard(batch_data=t.Type(np.ndarray),
Expand Down Expand Up @@ -318,13 +316,13 @@ def featurize(self, batch_data=np.zeros((1)), image_column_headers='',
if batch_processing:
assert len(image_column_headers) == 1 or isinstance(image_column_headers, str)
else:
assert len(image_column_headers) == self.data.shape[0]
assert len(image_column_headers) == batch_data.shape[0]
logging.info("Trying to featurize data.")

# Initialize featurized data vector with appropriate size
features = np.zeros((batch_data.shape[1],
self.num_features * len(image_column_headers)))

print(features.shape)
# Save csv
full_dataframe, df_features = self._featurize_helper(
features, image_column_headers, save_features, batch_data)
Expand All @@ -336,7 +334,6 @@ def featurize(self, batch_data=np.zeros((1)), image_column_headers='',
self.save_csv(omit_model, omit_depth, omit_output, omit_time)

self.full_dataframe = full_dataframe

return full_dataframe, df_features

def load_and_featurize_data(self,
Expand Down Expand Up @@ -414,7 +411,7 @@ def load_and_featurize_data(self,
# how many images exist in total, to control batch processing.
full_image_dict, df_original = self._full_image_dict_finder(image_path, csv_path,
image_column_headers,
new_csv_name, save_csv)
new_csv_name)
# Save the fixed inputs and full image dict
self.df_original = df_original
self.image_column_headers = image_column_headers
Expand All @@ -435,7 +432,7 @@ def load_and_featurize_data(self,
# If batch processing is turned off, load the images in one big batch and features them all
else:
full_data = self.load_data(image_column_headers, image_path, full_image_dict, csv_path,
new_csv_name, grayscale, save_data, save_csv)
new_csv_name, grayscale, save_data)

full_df, features_df = \
self.featurize(full_data, image_column_headers=image_column_headers,
Expand Down Expand Up @@ -521,30 +518,30 @@ def _load_data_helper(self,

def _featurize_helper(self, features, image_column_headers,
save_features, batch_data):
full_dataframe = pd.DataFrame()
# For each image column, perform the full featurization and add the features to the csv

# Save the initial features list
features_list = []

# For each image column, perform the full featurization and add the features to the df
for column in range(batch_data.shape[0]):
if not column:
df_prev = self.df_original
else:
df_prev = self.full_dataframe
print(df_prev)
# Featurize the data, and save it to the appropriate columns
partial_features = featurize_data(self.featurizer, batch_data[column])

features[:, self.num_features * column:self.num_features * column + self.num_features]\
= partial_features

# Save the full dataframe
column_dataframe, df_features = \
df_features = \
create_features(batch_data[column],
features,
df_prev,
partial_features,
image_column_headers[column],
self.image_dict[image_column_headers[column]],
continued_column=bool(column),
save_features=save_features)

features_list.append(df_features)

df_features = pd.concat(features_list, axis=1)
full_dataframe = pd.concat([self.df_original, df_features], axis=1)

return full_dataframe, df_features

def _batch_processing(self,
Expand Down Expand Up @@ -585,12 +582,12 @@ def _batch_processing(self,
# Load the images
batch_data = self.load_data(column, image_path,
batch_image_dict, csv_path, new_csv_name,
grayscale, False, False)
grayscale, save_data=False)

# If this is the first batch, the batch features will be saved alone.
# Otherwise, they are concatenated to the last batch
batch_features_list.append(self.featurize(batch_data, column,
True, save_features)[1])
save_features, batch_processing=True)[1])

# Increment index by batch size
index += batch_size
Expand All @@ -605,13 +602,11 @@ def _batch_processing(self,
# Return the full dataframe and features dataframe
return full_df, full_features_df

def _full_image_dict_finder(self, image_path, csv_path, image_column_headers, new_csv_name,
save_csv):
def _full_image_dict_finder(self, image_path, csv_path, image_column_headers, new_csv_name):
full_image_dict = {}

for column in image_column_headers:
list_of_image_paths, df = _image_paths_finder(image_path, csv_path,
column, new_csv_name, save_csv)
column, new_csv_name)

full_image_dict[column] = list_of_image_paths
return full_image_dict, df
Expand Down
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.

Large diffs are not rendered by default.

Large diffs are not rendered by default.

8 changes: 4 additions & 4 deletions tests/ImageFeaturizer_testing/csv_checking/resnet50_check_csv

Large diffs are not rendered by default.

Large diffs are not rendered by default.

Loading

0 comments on commit ea8433f

Please sign in to comment.