-
Notifications
You must be signed in to change notification settings - Fork 0
/
split.py
64 lines (56 loc) · 2.43 KB
/
split.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
# *_*coding: utf-8 *_*
# author --liming--
"""
读取images.txt文件,获得每个图像的标签
读取train_test_split.txt文件,获取每个图像的train, test标签.其中1为训练,0为测试.
"""
import os
import shutil
import numpy as np
import config
import time
time_start = time.time()
# 文件路径
path_images = config.path + 'images.txt'
path_split = config.path + 'train_test_split.txt'
trian_save_path = config.path + 'dataset/train/'
test_save_path = config.path + 'dataset/test/'
# 读取images.txt文件
images = []
with open(path_images, 'r') as f:
for line in f:
images.append(list(line.strip('\n').split(',')))
# 读取train_test_split.txt文件
split = []
with open(path_split, 'r') as f_:
for line in f_:
split.append(list(line.strip('\n').split(',')))
# 划分
num = len(images) # 图像的总个数
for k in range(num):
file_name = images[k][0].split(' ')[1].split('/')[0]
aaa = int(split[k][0][-1])
if int(split[k][0][-1]) == 1: # 划分到训练集
# 判断文件夹是否存在
if os.path.isdir(trian_save_path + file_name):
shutil.copy(config.path + 'images/' + images[k][0].split(' ')[1],
trian_save_path + file_name + '/' + images[k][0].split(' ')[1].split('/')[1])
else:
os.makedirs(trian_save_path + file_name)
shutil.copy(config.path + 'images/' + images[k][0].split(' ')[1],
trian_save_path + file_name + '/' + images[k][0].split(' ')[1].split('/')[1])
print('%s处理完毕!' % images[k][0].split(' ')[1].split('/')[1])
else:
# 判断文件夹是否存在
if os.path.isdir(test_save_path + file_name):
aaaa = config.path + 'images/' + images[k][0].split(' ')[1]
bbbb = test_save_path + file_name + '/' + images[k][0].split(' ')[1]
shutil.copy(config.path + 'images/' + images[k][0].split(' ')[1],
test_save_path + file_name + '/' + images[k][0].split(' ')[1].split('/')[1])
else:
os.makedirs(test_save_path + file_name)
shutil.copy(config.path + 'images/' + images[k][0].split(' ')[1],
test_save_path + file_name + '/' + images[k][0].split(' ')[1].split('/')[1])
print('%s处理完毕!' % images[k][0].split(' ')[1].split('/')[1])
time_end = time.time()
print('CUB200训练集和测试集划分完毕, 耗时%s!!' % (time_end - time_start))