-
Notifications
You must be signed in to change notification settings - Fork 108
/
build_hdf5_datasets.py
73 lines (50 loc) · 1.61 KB
/
build_hdf5_datasets.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
#!/usr/bin/env python
"""
Builds a HDF5 data set for test, train and validation data
Run script as python build_hdf5_datasets.py $mode
where mode can be 'test', 'train', 'val'
"""
import sys
import numpy as np
import pandas as pd
import tflearn
from tflearn.data_utils import build_hdf5_image_dataset
import pickle
import h5py
# Check inputs
if len(sys.argv) < 2:
raise ValueError('1 argument needed. Specify if you need to generate a train, test or val set')
else:
mode = sys.argv[1]
if mode not in ['train', 'test', 'val']:
raise ValueError('Argument not recognized. Has to be train, test or val')
# Read data
X = pd.read_pickle(mode + 'data')
y = pd.read_pickle(mode + 'labels')
dataset_file = mode + 'datalabels.txt'
filenames =\
X.index.to_series().apply(lambda x:\
mode+ '/image_'+str(x)+'.jpg')
filenames = filenames.values.astype(str)
labels = y.values.astype(int)
data = np.zeros(filenames.size,\
dtype=[('var1', 'S36'), ('var2', int)])
data['var1'] = filenames
data['var2'] = labels
np.savetxt(dataset_file, data, fmt="%10s %d")
output = mode + 'dataset.h5'
build_hdf5_image_dataset(dataset_file, image_shape = (50, 50, 1), \
mode ='file', output_path = output, categorical_labels = True, normalize = True,
grayscale = True)
# Load HDF5 dataset
h5f = h5py.File('../data/'+ mode+ 'dataset.h5', 'r')
X_images = h5f['X']
Y_labels = h5f['Y'][:]
print X_images.shape
X_images = X_images[:,:,:].reshape([-1,50,50,1])
print X_images.shape
h5f.close()
h5f = h5py.File('../data/' + mode + '.h5', 'w')
h5f.create_dataset('X', data=X_images)
h5f.create_dataset('Y', data=Y_labels)
h5f.close()