Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Fix memory leak in image classification demo, which is caused by dataprovider #323

Merged
merged 2 commits into from
Nov 9, 2016
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions demo/image_classification/.gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -5,3 +5,5 @@ plot.png
train.log
image_provider_copy_1.py
*pyc
train.list
test.list
Empty file modified demo/image_classification/data/download_cifar.sh
100644 → 100755
Empty file.
39 changes: 22 additions & 17 deletions demo/image_classification/image_provider.py
Original file line number Diff line number Diff line change
Expand Up @@ -58,24 +58,29 @@ def hook(settings, img_size, mean_img_size, num_classes, color, meta, use_jpeg,
settings.logger.info('DataProvider Initialization finished')


@provider(init_hook=hook)
def processData(settings, file_name):
@provider(init_hook=hook, min_pool_size=0)
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

min_pool_size=0 will cause the dataset is not shuffled fully. See here

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

shuffle file lists in image_provider.py.

def processData(settings, file_list):
"""
The main function for loading data.
Load the batch, iterate all the images and labels in this batch.
file_name: the batch file name.
file_list: the batch file list.
"""
data = cPickle.load(io.open(file_name, 'rb'))
indexes = list(range(len(data['images'])))
if settings.is_train:
random.shuffle(indexes)
for i in indexes:
if settings.use_jpeg == 1:
img = image_util.decode_jpeg(data['images'][i])
else:
img = data['images'][i]
img_feat = image_util.preprocess_img(img, settings.img_mean,
settings.img_size, settings.is_train,
settings.color)
label = data['labels'][i]
yield img_feat.tolist(), int(label)
with open(file_list, 'r') as fdata:
lines = [line.strip() for line in fdata]
random.shuffle(lines)
for file_name in lines:
with io.open(file_name.strip(), 'rb') as file:
data = cPickle.load(file)
indexes = list(range(len(data['images'])))
if settings.is_train:
random.shuffle(indexes)
for i in indexes:
if settings.use_jpeg == 1:
img = image_util.decode_jpeg(data['images'][i])
else:
img = data['images'][i]
img_feat = image_util.preprocess_img(img, settings.img_mean,
settings.img_size, settings.is_train,
settings.color)
label = data['labels'][i]
yield img_feat.astype('float32'), int(label)
2 changes: 2 additions & 0 deletions demo/image_classification/preprocess.py
Original file line number Diff line number Diff line change
Expand Up @@ -35,6 +35,8 @@ def option_parser():
data_creator = ImageClassificationDatasetCreater(data_dir,
processed_image_size,
color)
data_creator.train_list_name = "train.txt"
data_creator.test_list_name = "test.txt"
data_creator.num_per_batch = 1000
data_creator.overwrite = True
data_creator.create_batches()
3 changes: 3 additions & 0 deletions demo/image_classification/preprocess.sh
Original file line number Diff line number Diff line change
Expand Up @@ -17,3 +17,6 @@ set -e
data_dir=./data/cifar-out

python preprocess.py -i $data_dir -s 32 -c 1

echo "data/cifar-out/batches/train.txt" > train.list
echo "data/cifar-out/batches/test.txt" > test.list
4 changes: 2 additions & 2 deletions demo/image_classification/vgg_16_cifar.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,8 +25,8 @@
'img_size': 32,'num_classes': 10,
'use_jpeg': 1,'color': "color"}

define_py_data_sources2(train_list=data_dir+"train.list",
test_list=data_dir+'test.list',
define_py_data_sources2(train_list="train.list",
test_list="train.list",
module='image_provider',
obj='processData',
args=args)
Expand Down