-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathunet_kfold_prepare.py
114 lines (93 loc) · 3.39 KB
/
unet_kfold_prepare.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
###############################################################################
# unet_kfold_prepare.py
#
#
# This is an auxillary tool to prepare a directory hierarchy populated with
# raw and binary training images for use in doing a "manual" k-fold cross
# validation of Keras/TensorFlow models.
#
# Usage:
# python unet_kfold_prepare.py
#
################################################################################
import os, sys
from sklearn.model_selection import KFold
import numpy as np
import shutil
from shutil import copyfile
kfold_splits = 5
raw_source = "./runs/unet/padded_raw8"
bin_source = "./runs/unet/padded_binary"
kfold_directory = "./runs/unet/kfold"
filenames_file = "./runs/training_5000.txt"
# prep the destination directories
for i in range(0,kfold_splits):
path = os.path.join(kfold_directory, str(i))
if(os.path.isdir(path)):
shutil.rmtree(path)
os.mkdir(path)
trainpath = os.path.join(path, "train")
os.mkdir(trainpath)
os.mkdir(os.path.join(trainpath, "padded_raw8"))
os.mkdir(os.path.join(trainpath, "padded_binary"))
testpath = os.path.join(path, "test")
os.mkdir(testpath)
os.mkdir(os.path.join(testpath, "padded_raw8"))
os.mkdir(os.path.join(testpath, "padded_binary"))
# get the list of filenames in our training set
train_filenames = np.asarray([line.rstrip('\n') for line in open(filenames_file)])
print(len(train_filenames))
print(type(train_filenames))
print(train_filenames[200])
# do the k-fold split on the list of filenames
kf = KFold(n_splits = kfold_splits, shuffle=True)
kf.get_n_splits(train_filenames)
print(kf)
# copy the right files into the right places in our k-fold cross-validation
# folder hierarchy
fold = 0
for train_index, test_index in kf.split(train_filenames):
print("TRAIN:", train_index, "TEST:", test_index)
x_train, x_test = train_filenames[train_index], train_filenames[test_index]
cnt = 0
for file in x_train:
print("FOLD: %d, TRAIN COUNT: %d" %(fold,cnt))
# padded_raw8
src_file = os.path.join(raw_source, file)
dst_file = os.path.join(kfold_directory, str(fold))
dst_file = os.path.join(dst_file, "train")
dst_file = os.path.join(dst_file, "padded_raw8")
dst_file = os.path.join(dst_file, file)
copyfile(src_file, dst_file)
#print("COPY: %s -> %s" %(src_file, dst_file));
# padded_binary
src_file = os.path.join(bin_source, file)
dst_file = os.path.join(kfold_directory, str(fold))
dst_file = os.path.join(dst_file, "train")
dst_file = os.path.join(dst_file, "padded_binary")
dst_file = os.path.join(dst_file, file)
copyfile(src_file, dst_file)
#print("COPY: %s -> %s" %(src_file, dst_file));
cnt = cnt + 1
cnt = 0
for file in x_test:
print("FOLD: %d, TEST COUNT: %d" %(fold,cnt))
# padded_raw8
src_file = os.path.join(raw_source, file)
dst_file = os.path.join(kfold_directory, str(fold))
dst_file = os.path.join(dst_file, "test")
dst_file = os.path.join(dst_file, "padded_raw8")
dst_file = os.path.join(dst_file, file)
copyfile(src_file, dst_file)
#print("COPY: %s -> %s" %(src_file, dst_file));
# padded_binary
src_file = os.path.join(bin_source, file)
dst_file = os.path.join(kfold_directory, str(fold))
dst_file = os.path.join(dst_file, "test")
dst_file = os.path.join(dst_file, "padded_binary")
dst_file = os.path.join(dst_file, file)
copyfile(src_file, dst_file)
#print("COPY: %s -> %s" %(src_file, dst_file));
cnt = cnt + 1
fold = fold + 1