From 26413481f90fa0c58c6dc9f8822a5fac6a73d67d Mon Sep 17 00:00:00 2001 From: dangqingqing Date: Wed, 2 Nov 2016 23:53:55 +0800 Subject: [PATCH 1/2] Fix memory leak in image classification demo, which is caused by dataprovider. --- .../data/download_cifar.sh | 0 demo/image_classification/image_provider.py | 36 ++++++++++--------- demo/image_classification/preprocess.sh | 3 ++ demo/image_classification/vgg_16_cifar.py | 4 +-- 4 files changed, 24 insertions(+), 19 deletions(-) mode change 100644 => 100755 demo/image_classification/data/download_cifar.sh diff --git a/demo/image_classification/data/download_cifar.sh b/demo/image_classification/data/download_cifar.sh old mode 100644 new mode 100755 diff --git a/demo/image_classification/image_provider.py b/demo/image_classification/image_provider.py index 9e2f8b8949b39..3cc761c73784f 100644 --- a/demo/image_classification/image_provider.py +++ b/demo/image_classification/image_provider.py @@ -58,24 +58,26 @@ def hook(settings, img_size, mean_img_size, num_classes, color, meta, use_jpeg, settings.logger.info('DataProvider Initialization finished') -@provider(init_hook=hook) -def processData(settings, file_name): +@provider(init_hook=hook, min_pool_size=0) +def processData(settings, file_list): """ The main function for loading data. Load the batch, iterate all the images and labels in this batch. - file_name: the batch file name. + file_list: the batch file list. """ - data = cPickle.load(io.open(file_name, 'rb')) - indexes = list(range(len(data['images']))) - if settings.is_train: - random.shuffle(indexes) - for i in indexes: - if settings.use_jpeg == 1: - img = image_util.decode_jpeg(data['images'][i]) - else: - img = data['images'][i] - img_feat = image_util.preprocess_img(img, settings.img_mean, - settings.img_size, settings.is_train, - settings.color) - label = data['labels'][i] - yield img_feat.tolist(), int(label) + with open(file_list, 'r') as fdata: + for file_name in fdata: + data = cPickle.load(io.open(file_name.strip(), 'rb')) + indexes = list(range(len(data['images']))) + if settings.is_train: + random.shuffle(indexes) + for i in indexes: + if settings.use_jpeg == 1: + img = image_util.decode_jpeg(data['images'][i]) + else: + img = data['images'][i] + img_feat = image_util.preprocess_img(img, settings.img_mean, + settings.img_size, settings.is_train, + settings.color) + label = data['labels'][i] + yield img_feat.astype('float32'), int(label) diff --git a/demo/image_classification/preprocess.sh b/demo/image_classification/preprocess.sh index dfe3eb95d1ab8..3efdcc9a49154 100755 --- a/demo/image_classification/preprocess.sh +++ b/demo/image_classification/preprocess.sh @@ -17,3 +17,6 @@ set -e data_dir=./data/cifar-out python preprocess.py -i $data_dir -s 32 -c 1 + +echo "data/cifar-out/batches/train.list" > trn.list +echo "data/cifar-out/batches/test.list" > tst.list diff --git a/demo/image_classification/vgg_16_cifar.py b/demo/image_classification/vgg_16_cifar.py index e8b8af4bd313d..ae6a48a3e357a 100755 --- a/demo/image_classification/vgg_16_cifar.py +++ b/demo/image_classification/vgg_16_cifar.py @@ -25,8 +25,8 @@ 'img_size': 32,'num_classes': 10, 'use_jpeg': 1,'color': "color"} - define_py_data_sources2(train_list=data_dir+"train.list", - test_list=data_dir+'test.list', + define_py_data_sources2(train_list="trn.list", + test_list="tst.list", module='image_provider', obj='processData', args=args) From 6d187f9e2d31ae1a93ec30f252fcf43b28ecdf67 Mon Sep 17 00:00:00 2001 From: dangqingqing Date: Tue, 8 Nov 2016 17:52:26 +0800 Subject: [PATCH 2/2] follow comments --- demo/image_classification/.gitignore | 2 ++ demo/image_classification/image_provider.py | 33 +++++++++++---------- demo/image_classification/preprocess.py | 2 ++ demo/image_classification/preprocess.sh | 4 +-- demo/image_classification/vgg_16_cifar.py | 4 +-- 5 files changed, 26 insertions(+), 19 deletions(-) diff --git a/demo/image_classification/.gitignore b/demo/image_classification/.gitignore index 76961dd1436f8..6a05b8f6632db 100644 --- a/demo/image_classification/.gitignore +++ b/demo/image_classification/.gitignore @@ -5,3 +5,5 @@ plot.png train.log image_provider_copy_1.py *pyc +train.list +test.list diff --git a/demo/image_classification/image_provider.py b/demo/image_classification/image_provider.py index 3cc761c73784f..305efbcdc6bb1 100644 --- a/demo/image_classification/image_provider.py +++ b/demo/image_classification/image_provider.py @@ -66,18 +66,21 @@ def processData(settings, file_list): file_list: the batch file list. """ with open(file_list, 'r') as fdata: - for file_name in fdata: - data = cPickle.load(io.open(file_name.strip(), 'rb')) - indexes = list(range(len(data['images']))) - if settings.is_train: - random.shuffle(indexes) - for i in indexes: - if settings.use_jpeg == 1: - img = image_util.decode_jpeg(data['images'][i]) - else: - img = data['images'][i] - img_feat = image_util.preprocess_img(img, settings.img_mean, - settings.img_size, settings.is_train, - settings.color) - label = data['labels'][i] - yield img_feat.astype('float32'), int(label) + lines = [line.strip() for line in fdata] + random.shuffle(lines) + for file_name in lines: + with io.open(file_name.strip(), 'rb') as file: + data = cPickle.load(file) + indexes = list(range(len(data['images']))) + if settings.is_train: + random.shuffle(indexes) + for i in indexes: + if settings.use_jpeg == 1: + img = image_util.decode_jpeg(data['images'][i]) + else: + img = data['images'][i] + img_feat = image_util.preprocess_img(img, settings.img_mean, + settings.img_size, settings.is_train, + settings.color) + label = data['labels'][i] + yield img_feat.astype('float32'), int(label) diff --git a/demo/image_classification/preprocess.py b/demo/image_classification/preprocess.py index 0286a5d7e9dc8..fe7ea19bf0277 100755 --- a/demo/image_classification/preprocess.py +++ b/demo/image_classification/preprocess.py @@ -35,6 +35,8 @@ def option_parser(): data_creator = ImageClassificationDatasetCreater(data_dir, processed_image_size, color) + data_creator.train_list_name = "train.txt" + data_creator.test_list_name = "test.txt" data_creator.num_per_batch = 1000 data_creator.overwrite = True data_creator.create_batches() diff --git a/demo/image_classification/preprocess.sh b/demo/image_classification/preprocess.sh index 3efdcc9a49154..e3e86ff10675c 100755 --- a/demo/image_classification/preprocess.sh +++ b/demo/image_classification/preprocess.sh @@ -18,5 +18,5 @@ data_dir=./data/cifar-out python preprocess.py -i $data_dir -s 32 -c 1 -echo "data/cifar-out/batches/train.list" > trn.list -echo "data/cifar-out/batches/test.list" > tst.list +echo "data/cifar-out/batches/train.txt" > train.list +echo "data/cifar-out/batches/test.txt" > test.list diff --git a/demo/image_classification/vgg_16_cifar.py b/demo/image_classification/vgg_16_cifar.py index ae6a48a3e357a..edd6988c48acd 100755 --- a/demo/image_classification/vgg_16_cifar.py +++ b/demo/image_classification/vgg_16_cifar.py @@ -25,8 +25,8 @@ 'img_size': 32,'num_classes': 10, 'use_jpeg': 1,'color': "color"} - define_py_data_sources2(train_list="trn.list", - test_list="tst.list", + define_py_data_sources2(train_list="train.list", + test_list="train.list", module='image_provider', obj='processData', args=args)