diff --git a/README.md b/README.md index 6ee238fae..dc69a591d 100644 --- a/README.md +++ b/README.md @@ -1,11 +1,20 @@ # *Faster* R-CNN Fork + - This fork adds support to python 3.4 + - Uses an up-to-date Caffe Version + + #### Warning This Fork is still a work in progress -This fork : - Merged the [caffe-fast-rcnn](https://github.com/rbgirshick/caffe-fast-rcnn/tree/0dcd397b29507b8314e252e850518c5695efbb83) fork to the current Caffe version. The [PR](https://github.com/BVLC/caffe/pull/4163) is waiting to be Merged - Add support to Python 3.4 + - Not tested with Python 2.7 + +##### To-Do + + - Detect GPU arch in setup.py + - Test with python2.7 ### Disclaimer @@ -97,9 +106,31 @@ If you find Faster R-CNN useful in your research, please consider citing: **Note 2:** The `caffe` submodule needs to be on the `fast-rcnn` branch (or equivalent detached state). This will happen automatically *if you followed step 1 instructions*. 3. Build the Cython modules + + Install pip packages + ```sh + pip install -r requirements.txt + + ``` + + You have to check GPU arch default is sm_35 + ```Shell cd $FRCN_ROOT/lib - make + + # For python 2 + make python2 + + # For python 3 + make python3 + ``` + + If you want to rebuild : + ```sh + make clean + + # Then + make python2/python3 ``` 4. Build Caffe and pycaffe diff --git a/caffe b/caffe index c5f996b29..ad63d75ff 160000 --- a/caffe +++ b/caffe @@ -1 +1 @@ -Subproject commit c5f996b290bfe45a78fd662cfa24a61e89f58fd3 +Subproject commit ad63d75ff859fa9f2ad5537e52b07358674c49b0 diff --git a/lib/Makefile b/lib/Makefile index a48239850..d598072ad 100644 --- a/lib/Makefile +++ b/lib/Makefile @@ -1,3 +1,12 @@ -all: +python3: + python3 setup.py build_ext --inplace + rm -rf build + +python2: python setup.py build_ext --inplace rm -rf build + +clean: + rm utils/bbox.c + rm nms/cpu_nms.c + rm nms/gpu_nms.cpp \ No newline at end of file diff --git a/lib/datasets/coco.py b/lib/datasets/coco.py index 0815c696b..fdfb613f6 100644 --- a/lib/datasets/coco.py +++ b/lib/datasets/coco.py @@ -13,7 +13,8 @@ import numpy as np import scipy.sparse import scipy.io as sio -import cPickle +import six.moves.cPickle as pickle +from six.moves import range import json import uuid # COCO API @@ -36,7 +37,7 @@ def _filter_crowd_proposals(roidb, crowd_thresh): if len(crowd_inds) == 0 or len(non_gt_inds) == 0: continue - iscrowd = [int(True) for _ in xrange(len(crowd_inds))] + iscrowd = [int(True) for _ in range(len(crowd_inds))] crowd_boxes = ds_utils.xyxy_to_xywh(entry['boxes'][crowd_inds, :]) non_gt_boxes = ds_utils.xyxy_to_xywh(entry['boxes'][non_gt_inds, :]) ious = COCOmask.iou(non_gt_boxes, crowd_boxes, iscrowd) @@ -65,7 +66,7 @@ def __init__(self, image_set, year): self._COCO = COCO(self._get_ann_file()) cats = self._COCO.loadCats(self._COCO.getCatIds()) self._classes = tuple(['__background__'] + [c['name'] for c in cats]) - self._class_to_ind = dict(zip(self.classes, xrange(self.num_classes))) + self._class_to_ind = dict(zip(self.classes, range(self.num_classes))) self._class_to_coco_cat_id = dict(zip([c['name'] for c in cats], self._COCO.getCatIds())) self._image_index = self._load_image_set_index() @@ -126,10 +127,8 @@ def image_path_from_index(self, index): str(index).zfill(12) + '.jpg') image_path = osp.join(self._data_path, 'images', self._data_name, file_name) - assert( - osp.exists(image_path), - 'Path does not exist: {}'.format(image_path) - ) + + assert osp.exists(image_path), 'Path does not exist: {}'.format(image_path) return image_path @@ -153,7 +152,7 @@ def _roidb_from_proposals(self, method): if osp.exists(cache_file): with open(cache_file, 'rb') as fid: - roidb = cPickle.load(fid) + roidb = pickle.load(fid) print('{:s} {:s} roidb loaded from {:s}'.format( self.name, @@ -174,7 +173,7 @@ def _roidb_from_proposals(self, method): else: roidb = self._load_proposals(method, None) with open(cache_file, 'wb') as fid: - cPickle.dump(roidb, fid, cPickle.HIGHEST_PROTOCOL) + pickle.dump(roidb, fid, pickle.HIGHEST_PROTOCOL) print('wrote {:s} roidb to {:s}'.format(method, cache_file)) @@ -196,8 +195,10 @@ def _load_proposals(self, method, gt_roidb): 'MCG', 'selective_search', 'edge_boxes_AR', - 'edge_boxes_70'] - assert(method in valid_methods) + 'edge_boxes_70' + ] + + assert method in valid_methods print('Loading {} boxes'.format(method)) @@ -239,7 +240,7 @@ def gt_roidb(self): cache_file = osp.join(self.cache_path, self.name + '_gt_roidb.pkl') if osp.exists(cache_file): with open(cache_file, 'rb') as fid: - roidb = cPickle.load(fid) + roidb = pickle.load(fid) print('{} gt roidb loaded from {}'.format(self.name, cache_file)) @@ -249,7 +250,7 @@ def gt_roidb(self): for index in self._image_index] with open(cache_file, 'wb') as fid: - cPickle.dump(gt_roidb, fid, cPickle.HIGHEST_PROTOCOL) + pickle.dump(gt_roidb, fid, pickle.HIGHEST_PROTOCOL) print('wrote gt roidb to {}'.format(cache_file)) @@ -329,7 +330,7 @@ def _get_thr_ind(coco_eval, thr): ind = np.where((coco_eval.params.iouThrs > thr - 1e-5) & (coco_eval.params.iouThrs < thr + 1e-5))[0][0] iou_thr = coco_eval.params.iouThrs[ind] - assert(np.isclose(iou_thr, thr)) + assert np.isclose(iou_thr, thr) return ind @@ -373,7 +374,7 @@ def _do_detection_eval(self, res_file, output_dir): eval_file = osp.join(output_dir, 'detection_results.pkl') with open(eval_file, 'wb') as fid: - cPickle.dump(coco_eval, fid, cPickle.HIGHEST_PROTOCOL) + pickle.dump(coco_eval, fid, pickle.HIGHEST_PROTOCOL) print('Wrote COCO eval results to: {}'.format(eval_file)) @@ -392,7 +393,7 @@ def _coco_results_one_category(self, boxes, cat_id): [{'image_id': index, 'category_id': cat_id, 'bbox': [xs[k], ys[k], ws[k], hs[k]], - 'score': scores[k]} for k in xrange(dets.shape[0])]) + 'score': scores[k]} for k in range(dets.shape[0])]) return results diff --git a/lib/datasets/imdb.py b/lib/datasets/imdb.py index 0f3acae41..b0b5b3df7 100644 --- a/lib/datasets/imdb.py +++ b/lib/datasets/imdb.py @@ -11,6 +11,7 @@ import numpy as np import scipy.sparse from fast_rcnn.config import cfg +from six.moves import range class imdb(object): @@ -98,21 +99,21 @@ def evaluate_detections(self, all_boxes, output_dir=None): def _get_widths(self): return [ PIL.Image.open(self.image_path_at(i)).size[0] - for i in xrange(self.num_images) + for i in range(self.num_images) ] def append_flipped_images(self): num_images = self.num_images widths = self._get_widths() - for i in xrange(num_images): + for i in range(num_images): boxes = self.roidb[i]['boxes'].copy() oldx1 = boxes[:, 0].copy() oldx2 = boxes[:, 2].copy() boxes[:, 0] = widths[i] - oldx2 - 1 boxes[:, 2] = widths[i] - oldx1 - 1 - assert((boxes[:, 2] >= boxes[:, 0]).all()) + assert (boxes[:, 2] >= boxes[:, 0]).all() entry = {'boxes': boxes, 'gt_overlaps': self.roidb[i]['gt_overlaps'], @@ -152,13 +153,13 @@ def evaluate_recall(self, candidate_boxes=None, thresholds=None, [512**2, 1e5**2] # 512-inf ] - assert(area in areas, 'unknown area range: {}'.format(area)) + assert area in areas, 'unknown area range: {}'.format(area) area_range = area_ranges[areas[area]] gt_overlaps = np.zeros(0) num_pos = 0 - for i in xrange(self.num_images): + for i in range(self.num_images): # Checking for max_overlaps == 1 avoids including crowd annotations # (...pretty hacking :/) max_gt_overlaps = self.roidb[i]['gt_overlaps'].toarray().max(axis=1) @@ -189,7 +190,7 @@ def evaluate_recall(self, candidate_boxes=None, thresholds=None, gt_boxes.astype(np.float)) _gt_overlaps = np.zeros((gt_boxes.shape[0])) - for j in xrange(gt_boxes.shape[0]): + for j in range(gt_boxes.shape[0]): # find which proposal box maximally covers each gt box argmax_overlaps = overlaps.argmax(axis=0) # and get the iou amount of coverage for each gt box @@ -223,13 +224,11 @@ def evaluate_recall(self, candidate_boxes=None, thresholds=None, 'gt_overlaps': gt_overlaps} def create_roidb_from_box_list(self, box_list, gt_roidb): - assert( - len(box_list) == self.num_images, - 'Number of boxes must match number of ground-truth images' - ) + + assert len(box_list) == self.num_images, 'Number of boxes must match number of ground-truth images' roidb = [] - for i in xrange(self.num_images): + for i in range(self.num_images): boxes = box_list[i] num_boxes = boxes.shape[0] overlaps = np.zeros((num_boxes, self.num_classes), dtype=np.float32) @@ -257,7 +256,7 @@ def create_roidb_from_box_list(self, box_list, gt_roidb): @staticmethod def merge_roidbs(a, b): assert len(a) == len(b) - for i in xrange(len(a)): + for i in range(len(a)): a[i]['boxes'] = np.vstack((a[i]['boxes'], b[i]['boxes'])) a[i]['gt_classes'] = np.hstack((a[i]['gt_classes'], b[i]['gt_classes'])) diff --git a/lib/datasets/pascal_voc.py b/lib/datasets/pascal_voc.py index bfd7faa7c..f49aad0c5 100644 --- a/lib/datasets/pascal_voc.py +++ b/lib/datasets/pascal_voc.py @@ -12,11 +12,12 @@ import scipy.sparse import scipy.io as sio import utils.cython_bbox -import cPickle +import six.moves.cPickle as pickle import subprocess import uuid from voc_eval import voc_eval from fast_rcnn.config import cfg +from six.moves import range class pascal_voc(imdb): @@ -37,7 +38,7 @@ def __init__(self, image_set, year, devkit_path=None): 'cow', 'diningtable', 'dog', 'horse', 'motorbike', 'person', 'pottedplant', 'sheep', 'sofa', 'train', 'tvmonitor') - self._class_to_ind = dict(zip(self.classes, xrange(self.num_classes))) + self._class_to_ind = dict(zip(self.classes, range(self.num_classes))) self._image_ext = '.jpg' self._image_index = self._load_image_set_index() # Default to roidb handler @@ -53,15 +54,9 @@ def __init__(self, image_set, year, devkit_path=None): 'rpn_file': None, 'min_size': 2} - assert( - os.path.exists(self._devkit_path), - 'VOCdevkit path does not exist: {}'.format(self._devkit_path) - ) + assert os.path.exists(self._devkit_path), 'VOCdevkit path does not exist: {}'.format(self._devkit_path) - assert( - os.path.exists(self._data_path), - 'Path does not exist: {}'.format(self._data_path) - ) + assert os.path.exists(self._data_path), 'Path does not exist: {}'.format(self._data_path) def image_path_at(self, i): """ @@ -75,10 +70,7 @@ def image_path_from_index(self, index): """ image_path = os.path.join(self._data_path, 'JPEGImages', index + self._image_ext) - assert( - os.path.exists(image_path), - 'Path does not exist: {}'.format(image_path) - ) + assert os.path.exists(image_path), 'Path does not exist: {}'.format(image_path) return image_path @@ -90,10 +82,8 @@ def _load_image_set_index(self): # self._devkit_path + /VOCdevkit2007/VOC2007/ImageSets/Main/val.txt image_set_file = os.path.join(self._data_path, 'ImageSets', 'Main', self._image_set + '.txt') - assert( - os.path.exists(image_set_file), - 'Path does not exist: {}'.format(image_set_file) - ) + + assert os.path.exists(image_set_file), 'Path does not exist: {}'.format(image_set_file) with open(image_set_file) as f: image_index = [x.strip() for x in f.readlines()] @@ -115,7 +105,7 @@ def gt_roidb(self): cache_file = os.path.join(self.cache_path, self.name + '_gt_roidb.pkl') if os.path.exists(cache_file): with open(cache_file, 'rb') as fid: - roidb = cPickle.load(fid) + roidb = pickle.load(fid) print('{} gt roidb loaded from {}'.format(self.name, cache_file)) @@ -124,7 +114,7 @@ def gt_roidb(self): gt_roidb = [self._load_pascal_annotation(index) for index in self.image_index] with open(cache_file, 'wb') as fid: - cPickle.dump(gt_roidb, fid, cPickle.HIGHEST_PROTOCOL) + pickle.dump(gt_roidb, fid, pickle.HIGHEST_PROTOCOL) print('wrote gt roidb to {}'.format(cache_file)) @@ -144,7 +134,7 @@ def selective_search_roidb(self): if os.path.exists(cache_file): with open(cache_file, 'rb') as fid: - roidb = cPickle.load(fid) + roidb = pickle.load(fid) print('{} ss roidb loaded from {}'.format(self.name, cache_file)) @@ -158,7 +148,7 @@ def selective_search_roidb(self): roidb = self._load_selective_search_roidb(None) with open(cache_file, 'wb') as fid: - cPickle.dump(roidb, fid, cPickle.HIGHEST_PROTOCOL) + pickle.dump(roidb, fid, pickle.HIGHEST_PROTOCOL) print('wrote ss roidb to {}'.format(cache_file)) @@ -178,13 +168,10 @@ def _load_rpn_roidb(self, gt_roidb): filename = self.config['rpn_file'] print('loading {}'.format(filename)) - assert( - os.path.exists(filename), - 'rpn data not found at: {}'.format(filename) - ) + assert os.path.exists(filename), 'rpn data not found at: {}'.format(filename) with open(filename, 'rb') as f: - box_list = cPickle.load(f) + box_list = pickle.load(f) return self.create_roidb_from_box_list(box_list, gt_roidb) @@ -192,15 +179,13 @@ def _load_selective_search_roidb(self, gt_roidb): filename = os.path.abspath(os.path.join(cfg.DATA_DIR, 'selective_search_data', self.name + '.mat')) - assert( - os.path.exists(filename), - 'Selective search data not found at: {}'.format(filename) - ) + + assert os.path.exists(filename), 'Selective search data not found at: {}'.format(filename) raw_data = sio.loadmat(filename)['boxes'].ravel() box_list = [] - for i in xrange(raw_data.shape[0]): + for i in range(raw_data.shape[0]): boxes = raw_data[i][:, (1, 0, 3, 2)] - 1 keep = ds_utils.unique_boxes(boxes) boxes = boxes[keep, :] @@ -297,7 +282,7 @@ def _write_voc_results_file(self, all_boxes): continue # the VOCdevkit expects 1-based indices - for k in xrange(dets.shape[0]): + for k in range(dets.shape[0]): f.write('{:s} {:.3f} {:.1f} {:.1f} {:.1f} {:.1f}\n'. format(index, dets[k, -1], dets[k, 0] + 1, dets[k, 1] + 1, @@ -339,7 +324,7 @@ def _do_python_eval(self, output_dir='output'): print('AP for {} = {:.4f}'.format(cls, ap)) with open(os.path.join(output_dir, cls + '_pr.pkl'), 'w') as f: - cPickle.dump({'rec': rec, 'prec': prec, 'ap': ap}, f) + pickle.dump({'rec': rec, 'prec': prec, 'ap': ap}, f) print('Mean AP = {:.4f}'.format(np.mean(aps))) print('~~~~~~~~') diff --git a/lib/datasets/voc_eval.py b/lib/datasets/voc_eval.py index 2c9c230b4..9dc0e3c00 100644 --- a/lib/datasets/voc_eval.py +++ b/lib/datasets/voc_eval.py @@ -6,7 +6,7 @@ import xml.etree.ElementTree as ET import os -import cPickle +import six.moves.cPickle as pickle import numpy as np @@ -117,11 +117,11 @@ def voc_eval(detpath, # save print 'Saving cached annotations to {:s}'.format(cachefile) with open(cachefile, 'w') as f: - cPickle.dump(recs, f) + pickle.dump(recs, f) else: # load with open(cachefile, 'r') as f: - recs = cPickle.load(f) + recs = pickle.load(f) # extract gt objects for this class class_recs = {} diff --git a/lib/fast_rcnn/config.py b/lib/fast_rcnn/config.py index 4ac1ba45e..fc2a7af49 100644 --- a/lib/fast_rcnn/config.py +++ b/lib/fast_rcnn/config.py @@ -234,7 +234,7 @@ def _merge_a_into_b(a, b): if type(a) is not edict: return - for k, v in a.iteritems(): + for k, v in a.items(): # a must specify keys that are in b if k not in b: raise KeyError('{} is not a valid config key'.format(k)) @@ -277,31 +277,30 @@ def cfg_from_list(cfg_list): """Set config keys via list (e.g., from command line).""" from ast import literal_eval - assert(len(cfg_list) % 2 == 0) + assert len(cfg_list) % 2 == 0 for k, v in zip(cfg_list[0::2], cfg_list[1::2]): key_list = k.split('.') d = __C for subkey in key_list[:-1]: - assert(subkey in d) + assert subkey in d d = d[subkey] subkey = key_list[-1] - assert(subkey in d) + assert subkey in d try: value = literal_eval(v) except: # handle the case when v is a string literal value = v - assert( - type(value) == type(d[subkey]), - 'type {} does not match original type {}'.format( + + assert isinstance(value, type(d[subkey])), 'type {} does not match original type {}'.format( type(value), type(d[subkey]) ) - ) + d[subkey] = value diff --git a/lib/fast_rcnn/test.py b/lib/fast_rcnn/test.py index bd3fe4239..62a616ea1 100644 --- a/lib/fast_rcnn/test.py +++ b/lib/fast_rcnn/test.py @@ -15,9 +15,10 @@ import cv2 import caffe from fast_rcnn.nms_wrapper import nms -import cPickle +import six.moves.cPickle as pickle from utils.blob import im_list_to_blob import os +from six.moves import range def _get_image_blob(im): @@ -178,7 +179,7 @@ def im_detect(net, im, boxes=None): blobs_out = net.forward(**forward_kwargs) if cfg.TEST.HAS_RPN: - assert(len(im_scales) == 1,) "Only single-image batch implemented" + assert len(im_scales) == 1, "Only single-image batch implemented" rois = net.blobs['rois'].data.copy() # unscale back to raw image space boxes = rois[:, 1:5] / im_scales[0] @@ -213,7 +214,7 @@ def vis_detections(im, class_name, dets, thresh=0.3): import matplotlib.pyplot as plt im = im[:, :, (2, 1, 0)] - for i in xrange(np.minimum(10, dets.shape[0])): + for i in range(np.minimum(10, dets.shape[0])): bbox = dets[i, :4] score = dets[i, -1] @@ -237,11 +238,11 @@ def apply_nms(all_boxes, thresh): """ num_classes = len(all_boxes) num_images = len(all_boxes[0]) - nms_boxes = [[[] for _ in xrange(num_images)] - for _ in xrange(num_classes)] + nms_boxes = [[[] for _ in range(num_images)] + for _ in range(num_classes)] - for cls_ind in xrange(num_classes): - for im_ind in xrange(num_images): + for cls_ind in range(num_classes): + for im_ind in range(num_images): dets = all_boxes[cls_ind][im_ind] if dets == []: @@ -265,8 +266,8 @@ def test_net(net, imdb, max_per_image=100, thresh=0.05, vis=False): # all detections are collected into: # all_boxes[cls][image] = N x 5 array of detections in # (x1, y1, x2, y2, score) - all_boxes = [[[] for _ in xrange(num_images)] - for _ in xrange(imdb.num_classes)] + all_boxes = [[[] for _ in range(num_images)] + for _ in range(imdb.num_classes)] output_dir = get_output_dir(imdb, net) @@ -276,7 +277,7 @@ def test_net(net, imdb, max_per_image=100, thresh=0.05, vis=False): if not cfg.TEST.HAS_RPN: roidb = imdb.roidb - for i in xrange(num_images): + for i in range(num_images): # filter out any ground truth boxes if cfg.TEST.HAS_RPN: box_proposals = None @@ -295,7 +296,7 @@ def test_net(net, imdb, max_per_image=100, thresh=0.05, vis=False): _t['misc'].tic() # skip j = 0, because it's the background class - for j in xrange(1, imdb.num_classes): + for j in range(1, imdb.num_classes): inds = np.where(scores[:, j] > thresh)[0] cls_scores = scores[inds, j] cls_boxes = boxes[inds, j*4:(j+1)*4] @@ -310,10 +311,10 @@ def test_net(net, imdb, max_per_image=100, thresh=0.05, vis=False): # Limit to max_per_image detections *over all classes* if max_per_image > 0: image_scores = np.hstack([all_boxes[j][i][:, -1] - for j in xrange(1, imdb.num_classes)]) + for j in range(1, imdb.num_classes)]) if len(image_scores) > max_per_image: image_thresh = np.sort(image_scores)[-max_per_image] - for j in xrange(1, imdb.num_classes): + for j in range(1, imdb.num_classes): keep = np.where(all_boxes[j][i][:, -1] >= image_thresh)[0] all_boxes[j][i] = all_boxes[j][i][keep, :] _t['misc'].toc() @@ -330,7 +331,7 @@ def test_net(net, imdb, max_per_image=100, thresh=0.05, vis=False): det_file = os.path.join(output_dir, 'detections.pkl') with open(det_file, 'wb') as f: - cPickle.dump(all_boxes, f, cPickle.HIGHEST_PROTOCOL) + pickle.dump(all_boxes, f, pickle.HIGHEST_PROTOCOL) print('Evaluating detections') imdb.evaluate_detections(all_boxes, output_dir) diff --git a/lib/fast_rcnn/train.py b/lib/fast_rcnn/train.py index cb6f7111d..0d603a811 100644 --- a/lib/fast_rcnn/train.py +++ b/lib/fast_rcnn/train.py @@ -31,7 +31,7 @@ def __init__(self, solver_prototxt, roidb, output_dir, if (cfg.TRAIN.HAS_RPN and cfg.TRAIN.BBOX_REG and cfg.TRAIN.BBOX_NORMALIZE_TARGETS): # RPN can only use precomputed normalization because there are no # fixed statistics to compute a priori - assert(cfg.TRAIN.BBOX_NORMALIZE_TARGETS_PRECOMPUTED) + assert cfg.TRAIN.BBOX_NORMALIZE_TARGETS_PRECOMPUTED if cfg.TRAIN.BBOX_REG: print('Computing bounding-box regression targets...') diff --git a/lib/roi_data_layer/layer.py b/lib/roi_data_layer/layer.py index 95cc9ccfd..587b2900f 100644 --- a/lib/roi_data_layer/layer.py +++ b/lib/roi_data_layer/layer.py @@ -91,7 +91,7 @@ def setup(self, bottom, top): """Setup the RoIDataLayer.""" # parse the layer parameter string, which must be valid YAML - layer_params = yaml.load(self.param_str_) + layer_params = yaml.load(self.param_str) self._num_classes = layer_params['num_classes'] @@ -148,13 +148,13 @@ def setup(self, bottom, top): idx += 1 print('RoiDataLayer: name_to_top:', self._name_to_top_map) - assert(len(top) == len(self._name_to_top_map)) + assert len(top) == len(self._name_to_top_map) def forward(self, bottom, top): """Get blobs and copy them into this layer's top blob vector.""" blobs = self._get_next_minibatch() - for blob_name, blob in blobs.iteritems(): + for blob_name, blob in blobs.items(): top_ind = self._name_to_top_map[blob_name] # Reshape net's input blobs top[top_ind].reshape(*(blob.shape)) diff --git a/lib/roi_data_layer/minibatch.py b/lib/roi_data_layer/minibatch.py index 2f3db2fa1..c0569cfb3 100644 --- a/lib/roi_data_layer/minibatch.py +++ b/lib/roi_data_layer/minibatch.py @@ -12,6 +12,7 @@ import cv2 from fast_rcnn.config import cfg from utils.blob import prep_im_for_blob, im_list_to_blob +from six.moves import range def get_minibatch(roidb, num_classes): @@ -23,12 +24,9 @@ def get_minibatch(roidb, num_classes): size=num_images ) - assert( - cfg.TRAIN.BATCH_SIZE % num_images == 0, - 'num_images ({}) must divide BATCH_SIZE ({})'.format( + assert (cfg.TRAIN.BATCH_SIZE % num_images == 0), 'num_images ({}) must divide BATCH_SIZE ({})'.format( num_images, cfg.TRAIN.BATCH_SIZE ) - ) rois_per_image = cfg.TRAIN.BATCH_SIZE / num_images fg_rois_per_image = np.round(cfg.TRAIN.FG_FRACTION * rois_per_image) @@ -39,8 +37,9 @@ def get_minibatch(roidb, num_classes): blobs = {'data': im_blob} if cfg.TRAIN.HAS_RPN: - assert(len(im_scales) == 1,) "Single batch only" - assert(len(roidb) == 1,) "Single batch only" + assert len(im_scales) == 1, "Single batch only" + assert len(roidb) == 1, "Single batch only" + # gt boxes: (x1, y1, x2, y2, cls) gt_inds = np.where(roidb[0]['gt_classes'] != 0)[0] gt_boxes = np.empty((len(gt_inds), 5), dtype=np.float32) @@ -59,7 +58,7 @@ def get_minibatch(roidb, num_classes): bbox_inside_blob = np.zeros(bbox_targets_blob.shape, dtype=np.float32) # all_overlaps = [] - for im_i in xrange(num_images): + for im_i in range(num_images): labels, overlaps, im_rois, bbox_targets, bbox_inside_weights \ = _sample_rois(roidb[im_i], fg_rois_per_image, rois_per_image, num_classes) @@ -149,7 +148,7 @@ def _get_image_blob(roidb, scale_inds): num_images = len(roidb) processed_ims = [] im_scales = [] - for i in xrange(num_images): + for i in range(num_images): im = cv2.imread(roidb[i]['image']) if roidb[i]['flipped']: im = im[:, ::-1, :] @@ -204,7 +203,7 @@ def _vis_minibatch(im_blob, rois_blob, labels_blob, overlaps): Visualize a mini-batch for debugging. """ import matplotlib.pyplot as plt - for i in xrange(rois_blob.shape[0]): + for i in range(rois_blob.shape[0]): rois = rois_blob[i, :] im_ind = rois[0] roi = rois[1:] diff --git a/lib/roi_data_layer/roidb.py b/lib/roi_data_layer/roidb.py index a07ede114..7a7559aec 100644 --- a/lib/roi_data_layer/roidb.py +++ b/lib/roi_data_layer/roidb.py @@ -14,6 +14,7 @@ from fast_rcnn.bbox_transform import bbox_transform from utils.cython_bbox import bbox_overlaps import PIL +from six.moves import range def prepare_roidb(imdb): @@ -25,9 +26,9 @@ def prepare_roidb(imdb): recorded. """ sizes = [PIL.Image.open(imdb.image_path_at(i)).size - for i in xrange(imdb.num_images)] + for i in range(imdb.num_images)] roidb = imdb.roidb - for i in xrange(len(imdb.image_index)): + for i in range(len(imdb.image_index)): roidb[i]['image'] = imdb.image_path_at(i) roidb[i]['width'] = sizes[i][0] roidb[i]['height'] = sizes[i][1] @@ -42,23 +43,23 @@ def prepare_roidb(imdb): # sanity checks # max overlap of 0 => class should be zero (background) zero_inds = np.where(max_overlaps == 0)[0] - assert(all(max_classes[zero_inds] == 0)) + assert all(max_classes[zero_inds] == 0) # max overlap > 0 => class should not be zero (must be a fg class) nonzero_inds = np.where(max_overlaps > 0)[0] - assert(all(max_classes[nonzero_inds] != 0)) + assert all(max_classes[nonzero_inds] != 0) def add_bbox_regression_targets(roidb): """ Add information needed to train bounding-box regressors. """ - assert(len(roidb) > 0) - assert('max_classes' in roidb[0], 'Did you call prepare_roidb first?') + assert len(roidb) > 0 + assert 'max_classes' in roidb[0], 'Did you call prepare_roidb first?' num_images = len(roidb) # Infer number of classes from the number of columns in gt_overlaps num_classes = roidb[0]['gt_overlaps'].shape[1] - for im_i in xrange(num_images): + for im_i in range(num_images): rois = roidb[im_i]['boxes'] max_overlaps = roidb[im_i]['max_overlaps'] max_classes = roidb[im_i]['max_classes'] @@ -80,9 +81,9 @@ def add_bbox_regression_targets(roidb): class_counts = np.zeros((num_classes, 1)) + cfg.EPS sums = np.zeros((num_classes, 4)) squared_sums = np.zeros((num_classes, 4)) - for im_i in xrange(num_images): + for im_i in range(num_images): targets = roidb[im_i]['bbox_targets'] - for cls in xrange(1, num_classes): + for cls in range(1, num_classes): cls_inds = np.where(targets[:, 0] == cls)[0] if cls_inds.size > 0: class_counts[cls] += cls_inds.size @@ -102,9 +103,9 @@ def add_bbox_regression_targets(roidb): # Normalize targets if cfg.TRAIN.BBOX_NORMALIZE_TARGETS: print("Normalizing targets") - for im_i in xrange(num_images): + for im_i in range(num_images): targets = roidb[im_i]['bbox_targets'] - for cls in xrange(1, num_classes): + for cls in range(1, num_classes): cls_inds = np.where(targets[:, 0] == cls)[0] roidb[im_i]['bbox_targets'][cls_inds, 1:] -= means[cls, :] roidb[im_i]['bbox_targets'][cls_inds, 1:] /= stds[cls, :] diff --git a/lib/rpn/anchor_target_layer.py b/lib/rpn/anchor_target_layer.py index e5463fb39..21ce5639d 100644 --- a/lib/rpn/anchor_target_layer.py +++ b/lib/rpn/anchor_target_layer.py @@ -25,7 +25,7 @@ class AnchorTargetLayer(caffe.Layer): """ def setup(self, bottom, top): - layer_params = yaml.load(self.param_str_) + layer_params = yaml.load(self.param_str) anchor_scales = layer_params.get('scales', (8, 16, 32)) self._anchors = generate_anchors(scales=np.array(anchor_scales)) self._num_anchors = self._anchors.shape[0] @@ -75,10 +75,7 @@ def forward(self, bottom, top): # filter out-of-image anchors # measure GT overlap - assert( - bottom[0].data.shape[0] == 1, - 'Only single item batches are supported' - ) + assert bottom[0].data.shape[0] == 1, 'Only single item batches are supported' # map of shape (..., H, W) height, width = bottom[0].data.shape[-2:] @@ -190,10 +187,9 @@ def forward(self, bottom, top): positive_weights = np.ones((1, 4)) * 1.0 / num_examples negative_weights = np.ones((1, 4)) * 1.0 / num_examples else: - assert( - (cfg.TRAIN.RPN_POSITIVE_WEIGHT > 0) & - (cfg.TRAIN.RPN_POSITIVE_WEIGHT < 1) - ) + assert ((cfg.TRAIN.RPN_POSITIVE_WEIGHT > 0) & + (cfg.TRAIN.RPN_POSITIVE_WEIGHT < 1)) + positive_weights = ( cfg.TRAIN.RPN_POSITIVE_WEIGHT / np.sum(labels == 1) ) @@ -254,8 +250,8 @@ def forward(self, bottom, top): (1, height, width, A * 4) ).transpose(0, 3, 1, 2) - assert(bbox_inside_weights.shape[2] == height) - assert(bbox_inside_weights.shape[3] == width) + assert bbox_inside_weights.shape[2] == height + assert bbox_inside_weights.shape[3] == width top[2].reshape(*bbox_inside_weights.shape) top[2].data[...] = bbox_inside_weights @@ -264,8 +260,8 @@ def forward(self, bottom, top): (1, height, width, A * 4) ).transpose(0, 3, 1, 2) - assert(bbox_outside_weights.shape[2] == height) - assert(bbox_outside_weights.shape[3] == width) + assert bbox_outside_weights.shape[2] == height + assert bbox_outside_weights.shape[3] == width top[3].reshape(*bbox_outside_weights.shape) top[3].data[...] = bbox_outside_weights @@ -303,9 +299,9 @@ def _compute_targets(ex_rois, gt_rois): Compute bounding-box regression targets for an image. """ - assert(ex_rois.shape[0] == gt_rois.shape[0]) - assert(ex_rois.shape[1] == 4) - assert(gt_rois.shape[1] == 5) + assert ex_rois.shape[0] == gt_rois.shape[0] + assert ex_rois.shape[1] == 4 + assert gt_rois.shape[1] == 5 return bbox_transform( ex_rois, gt_rois[:, :4] diff --git a/lib/rpn/generate.py b/lib/rpn/generate.py index 177d9a678..2894a6ed3 100644 --- a/lib/rpn/generate.py +++ b/lib/rpn/generate.py @@ -10,6 +10,7 @@ from utils.timer import Timer import numpy as np import cv2 +from six.moves import range def _vis_proposals(im, dets, thresh=0.5): @@ -68,7 +69,7 @@ def _get_image_blob(im): processed_ims = [] - assert(len(cfg.TEST.SCALES) == 1) + assert len(cfg.TEST.SCALES) == 1 target_size = cfg.TEST.SCALES[0] im_scale = float(target_size) / float(im_size_min) @@ -110,8 +111,8 @@ def imdb_proposals(net, imdb): """Generate RPN proposals on all images in an imdb.""" _t = Timer() - imdb_boxes = [[] for _ in xrange(imdb.num_images)] - for i in xrange(imdb.num_images): + imdb_boxes = [[] for _ in range(imdb.num_images)] + for i in range(imdb.num_images): im = cv2.imread(imdb.image_path_at(i)) _t.tic() imdb_boxes[i], scores = im_proposals(net, im) diff --git a/lib/rpn/generate_anchors.py b/lib/rpn/generate_anchors.py index d4c90e9f2..48d899387 100644 --- a/lib/rpn/generate_anchors.py +++ b/lib/rpn/generate_anchors.py @@ -6,6 +6,7 @@ # -------------------------------------------------------- import numpy as np +from six.moves import range # Verify that we compute the same anchors as Shaoqing's matlab implementation: # @@ -46,7 +47,7 @@ def generate_anchors(base_size=16, ratios=[0.5, 1, 2], ratio_anchors = _ratio_enum(base_anchor, ratios) anchors = np.vstack( [_scale_enum(ratio_anchors[i, :], scales) - for i in xrange(ratio_anchors.shape[0])] + for i in range(ratio_anchors.shape[0])] ) return anchors diff --git a/lib/rpn/proposal_layer.py b/lib/rpn/proposal_layer.py index 2fd14f2ff..f8a344c84 100644 --- a/lib/rpn/proposal_layer.py +++ b/lib/rpn/proposal_layer.py @@ -9,7 +9,7 @@ import numpy as np import yaml from fast_rcnn.config import cfg -from generate_anchors import generate_anchors +from rpn.generate_anchors import generate_anchors from fast_rcnn.bbox_transform import bbox_transform_inv, clip_boxes from fast_rcnn.nms_wrapper import nms @@ -22,9 +22,17 @@ class ProposalLayer(caffe.Layer): transformations to a set of regular boxes (called "anchors"). """ + def get_phase(self): + if self.phase == 0: + return 'TRAIN' + elif self.phase == 1: + return 'TEST' + else: + raise ValueError("Unkown Phase") + def setup(self, bottom, top): # parse the layer parameter string, which must be valid YAML - layer_params = yaml.load(self.param_str_) + layer_params = yaml.load(self.param_str) self._feat_stride = layer_params['feat_stride'] anchor_scales = layer_params.get('scales', (8, 16, 32)) @@ -59,12 +67,9 @@ def forward(self, bottom, top): # take after_nms_topN proposals after NMS # return the top proposals (-> RoIs top, scores top) - assert( - bottom[0].data.shape[0] == 1, - 'Only single item batches are supported' - ) + assert bottom[0].data.shape[0] == 1, 'Only single item batches are supported' - cfg_key = str(self.phase) # either 'TRAIN' or 'TEST' + cfg_key = self.get_phase() # either 'TRAIN' or 'TEST' pre_nms_topN = cfg[cfg_key].RPN_PRE_NMS_TOP_N post_nms_topN = cfg[cfg_key].RPN_POST_NMS_TOP_N nms_thresh = cfg[cfg_key].RPN_NMS_THRESH diff --git a/lib/rpn/proposal_target_layer.py b/lib/rpn/proposal_target_layer.py index b7440161b..6ff6ea4da 100644 --- a/lib/rpn/proposal_target_layer.py +++ b/lib/rpn/proposal_target_layer.py @@ -23,7 +23,7 @@ class ProposalTargetLayer(caffe.Layer): """ def setup(self, bottom, top): - layer_params = yaml.load(self.param_str_) + layer_params = yaml.load(self.param_str) self._num_classes = layer_params['num_classes'] # sampled rois (0, x1, y1, x2, y2) @@ -53,10 +53,7 @@ def forward(self, bottom, top): ) # Sanity check: single batch only - assert( - np.all(all_rois[:, 0] == 0), - 'Only single item batches are supported' - ) + assert np.all(all_rois[:, 0] == 0), 'Only single item batches are supported' num_images = 1 rois_per_image = cfg.TRAIN.BATCH_SIZE / num_images @@ -143,9 +140,9 @@ def _compute_targets(ex_rois, gt_rois, labels): Compute bounding-box regression targets for an image. """ - assert(ex_rois.shape[0] == gt_rois.shape[0]) - assert(ex_rois.shape[1] == 4) - assert(gt_rois.shape[1] == 4) + assert ex_rois.shape[0] == gt_rois.shape[0] + assert ex_rois.shape[1] == 4 + assert gt_rois.shape[1] == 4 targets = bbox_transform(ex_rois, gt_rois) if cfg.TRAIN.BBOX_NORMALIZE_TARGETS_PRECOMPUTED: diff --git a/lib/setup.py b/lib/setup.py index c7b88d8e5..b6a73dce2 100644 --- a/lib/setup.py +++ b/lib/setup.py @@ -56,7 +56,7 @@ def locate_cuda(): 'lib64': pjoin(home, 'lib64') } - for k, v in cudaconfig.iteritems(): + for k, v in cudaconfig.items(): if not os.path.exists(v): raise EnvironmentError( 'The CUDA %s path could not be located in %s' % (k, v) diff --git a/lib/utils/blob.py b/lib/utils/blob.py index adab9df44..738dc1602 100644 --- a/lib/utils/blob.py +++ b/lib/utils/blob.py @@ -9,6 +9,7 @@ import numpy as np import cv2 +from six.moves import range def im_list_to_blob(ims): @@ -20,7 +21,7 @@ def im_list_to_blob(ims): num_images = len(ims) blob = np.zeros((num_images, max_shape[0], max_shape[1], 3), dtype=np.float32) - for i in xrange(num_images): + for i in range(num_images): im = ims[i] blob[i, 0:im.shape[0], 0:im.shape[1], :] = im # Move channels (axis 3) to axis 1 diff --git a/requirements.txt b/requirements.txt new file mode 100644 index 000000000..ea1584879 --- /dev/null +++ b/requirements.txt @@ -0,0 +1,2 @@ +Cython>=0.21 +easydict>=1.6 \ No newline at end of file diff --git a/tools/compress_net.py b/tools/compress_net.py index f4d25deee..53938504c 100755 --- a/tools/compress_net.py +++ b/tools/compress_net.py @@ -1,4 +1,4 @@ -#!/usr/bin/env python +#!/usr/bin/env python3 # -------------------------------------------------------- # Fast R-CNN diff --git a/tools/demo.py b/tools/demo.py index d27cf8035..f984fe703 100755 --- a/tools/demo.py +++ b/tools/demo.py @@ -1,4 +1,4 @@ -#!/usr/bin/env python +#!/usr/bin/env python3 # -------------------------------------------------------- # Faster R-CNN @@ -18,6 +18,7 @@ from fast_rcnn.test import im_detect from fast_rcnn.nms_wrapper import nms from utils.timer import Timer +from six.moves import range import matplotlib.pyplot as plt import numpy as np import scipy.io as sio @@ -85,7 +86,9 @@ def vis_detections(im, class_name, dets, thresh=0.5): def demo(net, image_name): - """Detect object classes in an image using pre-computed object proposals.""" + """ + Detect object classes in an image using pre-computed object proposals. + """ # Load the demo image im_file = os.path.join(cfg.DATA_DIR, 'demo', image_name) @@ -166,7 +169,7 @@ def parse_args(): # Warmup on a dummy image im = 128 * np.ones((300, 500, 3), dtype=np.uint8) - for i in xrange(2): + for i in range(2): _, _ = im_detect(net, im) im_names = [ diff --git a/tools/eval_recall.py b/tools/eval_recall.py index 61e25bb9f..4cefc34ff 100755 --- a/tools/eval_recall.py +++ b/tools/eval_recall.py @@ -1,4 +1,4 @@ -#!/usr/bin/env python +#!/usr/bin/env python3 import _init_paths from fast_rcnn.config import cfg, cfg_from_file, cfg_from_list @@ -64,7 +64,7 @@ def parse_args(): def recall_at(t): ind = np.where(thresholds > t - 1e-5)[0][0] - assert(np.isclose(thresholds[ind], t)) + assert np.isclose(thresholds[ind], t) return recalls[ind] diff --git a/tools/reval.py b/tools/reval.py index 7b3dc95f1..0f6233d8e 100755 --- a/tools/reval.py +++ b/tools/reval.py @@ -1,4 +1,4 @@ -#!/usr/bin/env python +#!/usr/bin/env python3 # -------------------------------------------------------- # Fast R-CNN diff --git a/tools/rpn_generate.py b/tools/rpn_generate.py index 16e942c65..8e5f39645 100755 --- a/tools/rpn_generate.py +++ b/tools/rpn_generate.py @@ -1,4 +1,4 @@ -#!/usr/bin/env python +#!/usr/bin/env python3 # -------------------------------------------------------- # Fast/er/ R-CNN diff --git a/tools/test_net.py b/tools/test_net.py index a4dc8a19a..73c5107ca 100755 --- a/tools/test_net.py +++ b/tools/test_net.py @@ -1,4 +1,4 @@ -#!/usr/bin/env python +#!/usr/bin/env python3 # -------------------------------------------------------- # Fast R-CNN diff --git a/tools/train_faster_rcnn_alt_opt.py b/tools/train_faster_rcnn_alt_opt.py index 9fa9d277d..3e2b03a53 100755 --- a/tools/train_faster_rcnn_alt_opt.py +++ b/tools/train_faster_rcnn_alt_opt.py @@ -1,4 +1,4 @@ -#!/usr/bin/env python +#!/usr/bin/env python3 # -------------------------------------------------------- # Faster R-CNN diff --git a/tools/train_net.py b/tools/train_net.py index 7f9c2c0a6..b991a210a 100755 --- a/tools/train_net.py +++ b/tools/train_net.py @@ -1,4 +1,4 @@ -#!/usr/bin/env python +#!/usr/bin/env python3 # -------------------------------------------------------- # Fast R-CNN diff --git a/tools/train_svms.py b/tools/train_svms.py index 65832c194..e1802385b 100755 --- a/tools/train_svms.py +++ b/tools/train_svms.py @@ -1,4 +1,4 @@ -#!/usr/bin/env python +#!/usr/bin/env python3 # -------------------------------------------------------- # Fast R-CNN @@ -26,6 +26,7 @@ from sklearn import svm import os import sys +from six.moves import range class SVMTrainer(object): @@ -55,7 +56,7 @@ def _get_feature_scale(self, num_images=100): total_norm = 0.0 count = 0.0 inds = npr.choice( - xrange(self.imdb.num_images), size=num_images, + range(self.imdb.num_images), size=num_images, replace=False ) @@ -83,12 +84,12 @@ def _get_pos_counts(self): counts = np.zeros((len(self.imdb.classes)), dtype=np.int) roidb = self.imdb.roidb - for i in xrange(len(roidb)): - for j in xrange(1, self.imdb.num_classes): + for i in range(len(roidb)): + for j in range(1, self.imdb.num_classes): I = np.where(roidb[i]['gt_classes'] == j)[0] counts[j] += len(I) - for j in xrange(1, self.imdb.num_classes): + for j in range(1, self.imdb.num_classes): print('class {:s} has {:d} positives'.format( self.imdb.classes[j], counts[j] )) @@ -98,14 +99,14 @@ def _get_pos_counts(self): def get_pos_examples(self): counts = self._get_pos_counts() - for i in xrange(len(counts)): + for i in range(len(counts)): self.trainers[i].alloc_pos(counts[i]) _t = Timer() roidb = self.imdb.roidb num_images = len(roidb) # num_images = 100 - for i in xrange(num_images): + for i in range(num_images): im = cv2.imread(self.imdb.image_path_at(i)) if roidb[i]['flipped']: @@ -118,7 +119,7 @@ def get_pos_examples(self): _t.toc() feat = self.net.blobs[self.layer].data - for j in xrange(1, self.imdb.num_classes): + for j in range(1, self.imdb.num_classes): cls_inds = np.where(roidb[i]['gt_classes'][gt_inds] == j)[0] if len(cls_inds) > 0: cls_feat = feat[cls_inds, :] @@ -155,7 +156,7 @@ def train_with_hard_negatives(self): num_images = len(roidb) # num_images = 100 - for i in xrange(num_images): + for i in range(num_images): im = cv2.imread(self.imdb.image_path_at(i)) if roidb[i]['flipped']: @@ -165,7 +166,7 @@ def train_with_hard_negatives(self): scores, boxes = im_detect(self.net, im, roidb[i]['boxes']) _t.toc() feat = self.net.blobs[self.layer].data - for j in xrange(1, self.imdb.num_classes): + for j in range(1, self.imdb.num_classes): hard_inds = np.where( (scores[:, j] > self.hard_thresh) & (roidb[i]['gt_overlaps'][:, j].toarray().ravel() < self.neg_iou_thresh) @@ -208,7 +209,7 @@ def train(self): # One final SVM retraining for each class # Install SVMs into net - for j in xrange(1, self.imdb.num_classes): + for j in range(1, self.imdb.num_classes): new_w_b = self.trainers[j].append_neg_and_retrain(force=True) self.update_net(j, new_w_b[0], new_w_b[1]) @@ -282,11 +283,10 @@ def train(self): # Sanity check scores_ret = ( - X * 1.0 / self.feature_scale).dot(w.T * self.feature_scale) + b - assert( - np.allclose(scores, scores_ret[:, 0], atol=1e-5), - "Scores from returned model don't match decision function" - ) + X * 1.0 / self.feature_scale + ).dot(w.T * self.feature_scale) + b + + assert np.allclose(scores, scores_ret[:, 0], atol=1e-5), "Scores from returned model don't match decision function" return ((w * self.feature_scale, b), pos_scores, neg_scores)