forked from ruotianluo/self-critical.pytorch
-
Notifications
You must be signed in to change notification settings - Fork 0
/
profeat_retina_resize.py
115 lines (95 loc) · 4.48 KB
/
profeat_retina_resize.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
"""
Preprocess a raw json dataset into features files for use in data_loader.py
Input: json file that has the form
[{ file_path: 'path/img.jpg', captions: ['a caption', ...] }, ...]
example element in this list would look like
{'captions': [u'A man with a red helmet on a small moped on a dirt road. ', u'Man riding a motor bike on a dirt road on the countryside.', u'A man riding on the back of a motorcycle.', u'A dirt path with a young person on a motor bike rests to the foreground of a verdant area with a bridge and a background of cloud-wreathed mountains. ', u'A man in a red shirt and a red hat is on a motorcycle on a hill side.'], 'file_path': u'val2014/COCO_val2014_000000391895.jpg', 'id': 391895}
This script reads this json, does some basic preprocessing on the captions
(e.g. lowercase, etc.), creates a special UNK token, and encodes everything to arrays
Output: two folders of features
"""
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import os
import json
import argparse
from random import shuffle, seed
import string
# non-standard dependencies:
import h5py
from six.moves import cPickle
import numpy as np
import torch
import torchvision.models as models
import skimage.io
import matplotlib.pyplot as plt
from torchvision import transforms as trn
preprocess = trn.Compose([
#trn.ToTensor(),
trn.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
])
from captioning.utils.resnet_utils import myResnet
import captioning.utils.resnet as resnet
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
def main(params):
print(device)
net = getattr(resnet, params['model'])()
net.load_state_dict(torch.load(os.path.join(params['model_root'],params['model']+'.pth')))
my_resnet = myResnet(net)
#my_resnet.cuda()
my_resnet.to(device)
my_resnet.eval()
imgs = json.load(open(params['input_json'], 'r'))
imgs = imgs['images']
N = len(imgs)
seed(123) # make reproducible
dir_fc = params['output_dir']+'_fc'
dir_att = params['output_dir']+'_att'
if not os.path.isdir(dir_fc):
os.mkdir(dir_fc)
if not os.path.isdir(dir_att):
os.mkdir(dir_att)
resume_flag = False
for i,img in enumerate(imgs):
# load the image
#I = skimage.io.imread(os.path.join(params['images_root'], img['filepath'], img['filename']))
print('The image path is', img['file_path'])
# if img['file_path'] == '/media/hdd/data/imcaption/retina_dataset_resize/resize/1_2_826_0_1_3680043_9_5115_636252259520332334/1_3_6_1_4_1_33437_10_4_11578754_13134123662_18471_4_1_0_0.png':
# resume_flag = True
# if resume_flag == True:
I = skimage.io.imread(img['file_path'])
I = I[:,:,0:3]
# print('The size of image I', I.shape)
# plt.imshow(I[:,:,0:3])
# plt.show()
# handle grayscale input images
if len(I.shape) == 2:
I = I[:,:,np.newaxis]
I = np.concatenate((I,I,I), axis=2)
I = I.astype('float32')/255.0
I = torch.from_numpy(I.transpose([2,0,1])).to(device)#.cuda()
I = preprocess(I)
with torch.no_grad():
tmp_fc, tmp_att = my_resnet(I, params['att_size'])
# write to pkl
np.save(os.path.join(dir_fc, str(img['imgid'])), tmp_fc.data.cpu().float().numpy())
np.savez_compressed(os.path.join(dir_att, str(img['imgid'])), feat=tmp_att.data.cpu().float().numpy())
if i % 10 == 0:
print('processing %d/%d (%.2f%% done)' % (i, N, i*100.0/N))
print('wrote ', params['output_dir'])
if __name__ == "__main__":
parser = argparse.ArgumentParser()
# input json
parser.add_argument('--input_json', default ='data/findings.json', help='input json file to process into hdf5')
parser.add_argument('--output_dir', default='data/findings', help='output h5 file')
# options
parser.add_argument('--images_root', default='', help='root location in which images are stored, to be prepended to file_path in input json')
parser.add_argument('--att_size', default=14, type=int, help='14x14 or 7x7')
parser.add_argument('--model', default='resnet101', type=str, help='resnet101, resnet152')
parser.add_argument('--model_root', default='data/imagenet_weights', type=str, help='model root')
args = parser.parse_args()
params = vars(args) # convert to ordinary dict
print('parsed input parameters:')
print(json.dumps(params, indent = 2))
main(params)