-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathmerge_data.py
83 lines (62 loc) · 2.87 KB
/
merge_data.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
"""Merges the data and labels .npy files from all participants into one file
This scripts serves the purpose to merge the data of all the participants into
one big .npy file that can then be input to a neural network
...
Parameters:
Data (str):
Directory containing all the different .npy files of the participants
output (str, optional):
Output directory. Same as Data if not specified
"""
import argparse
import os
import numpy as np
parser = argparse.ArgumentParser()
parser.add_argument('Data',
help='Directory containing all the processed .npy files')
parser.add_argument('--output', '-o',
help='Output directory for the combined .npy files. Same as \
Data if not specified')
arguments = parser.parse_args()
dir = os.path.abspath(arguments.Data)
out_dir = os.path.abspath(arguments.output) \
if arguments.output \
else os.path.abspath(arguments.Data)
assert os.path.exists(dir) and os.path.isdir(dir)
assert os.path.exists(out_dir) and os.path.isdir(out_dir)
data_files = sorted([file for file in os.listdir(dir) if 'data' in file])
label_files = sorted([file for file in os.listdir(dir) if 'label' in file])
assert len(data_files) == len(label_files)
print('')
print('Data directory: {}'.format(dir))
print('Output directory: {}'.format(out_dir))
print('Number of participant data found: {}'.format(len(data_files)))
print('')
# load the first label file to get the shape
data_shape = np.load(os.path.join(dir, data_files[0])).shape
# basic name pattern for the output file
outfile_name = 'all_{}'.format(data_files[0][4:-9])
all_labels = np.ndarray([1])
all_data = np.ndarray([1, *data_shape[1:]])
print('Merging files...')
# transfer all the data to the memory map
for i, (data_file, labels_file) in (enumerate(zip(data_files, label_files))):
print(' ({}, {})'.format(data_file, labels_file))
# check that the 2 files belong to the same participant by comparing
# the first 3 letters namely pXX
assert data_file[:3] == labels_file[:3]
data = np.load(os.path.join(dir,data_file))
labels = np.load(os.path.join(dir,labels_file))
# all_data[i*data_shape[0]:i*data_shape[0]+data_shape[0],...] = data
# all_labels[i*data_shape[0]:i*data_shape[0]+data_shape[0]] = labels
all_data = np.concatenate((all_data, data), axis=0)
all_labels = np.concatenate((all_labels, labels), axis=0)
print('')
print('Writing data to disc...')
# save the data
np.save(os.path.join(out_dir, '{}_data.npy'.format(outfile_name)), all_data[1:,...])
np.save(os.path.join(out_dir, '{}_labels.npy'.format(outfile_name)), all_labels[1:,...])
print(" {}".format(os.path.join(out_dir, '{}_data.npy'.format(outfile_name))))
print(" {}".format(os.path.join(out_dir, '{}_labels.npy'.format(outfile_name))))
# remove the memory map
# os.remove('{}.memmap'.format(outfile_name))