From 1f244d81be4511d24745e7fb9d31278804f70222 Mon Sep 17 00:00:00 2001 From: Hugo Serrat Date: Wed, 18 May 2016 17:04:47 +0200 Subject: [PATCH 1/3] Update code to support python 3 --- lib/datasets/coco.py | 142 +++++++--- lib/datasets/ds_utils.py | 8 +- lib/datasets/factory.py | 15 +- lib/datasets/imdb.py | 75 +++-- lib/datasets/pascal_voc.py | 148 +++++++--- lib/datasets/tools/mcg_munge.py | 10 +- lib/datasets/voc_eval.py | 4 + lib/fast_rcnn/bbox_transform.py | 5 +- lib/fast_rcnn/config.py | 58 ++-- lib/fast_rcnn/nms_wrapper.py | 3 +- lib/fast_rcnn/test.py | 69 ++++- lib/fast_rcnn/train.py | 92 ++++--- lib/roi_data_layer/layer.py | 28 +- lib/roi_data_layer/minibatch.py | 60 ++-- lib/roi_data_layer/roidb.py | 53 ++-- lib/rpn/anchor_target_layer.py | 143 ++++++---- lib/rpn/generate.py | 33 ++- lib/rpn/generate_anchors.py | 25 +- lib/rpn/proposal_layer.py | 53 ++-- lib/rpn/proposal_target_layer.py | 67 +++-- lib/setup.py | 48 ++-- lib/transform/torch_image_transform_layer.py | 1 + lib/utils/blob.py | 2 + lib/utils/timer.py | 2 + tools/compress_net.py | 50 ++-- tools/demo.py | 101 ++++--- tools/eval_recall.py | 62 +++-- tools/reval.py | 45 ++- tools/rpn_generate.py | 59 ++-- tools/test_net.py | 78 ++++-- tools/train_faster_rcnn_alt_opt.py | 274 ++++++++++++------- tools/train_net.py | 86 +++--- tools/train_svms.py | 158 +++++++---- 33 files changed, 1361 insertions(+), 696 deletions(-) diff --git a/lib/datasets/coco.py b/lib/datasets/coco.py index bfe8ff3d7..0815c696b 100644 --- a/lib/datasets/coco.py +++ b/lib/datasets/coco.py @@ -21,6 +21,7 @@ from pycocotools.cocoeval import COCOeval from pycocotools import mask as COCOmask + def _filter_crowd_proposals(roidb, crowd_thresh): """ Finds proposals that are inside crowd regions and marks them with @@ -31,8 +32,10 @@ def _filter_crowd_proposals(roidb, crowd_thresh): overlaps = entry['gt_overlaps'].toarray() crowd_inds = np.where(overlaps.max(axis=1) == -1)[0] non_gt_inds = np.where(entry['gt_classes'] == 0)[0] + if len(crowd_inds) == 0 or len(non_gt_inds) == 0: continue + iscrowd = [int(True) for _ in xrange(len(crowd_inds))] crowd_boxes = ds_utils.xyxy_to_xywh(entry['boxes'][crowd_inds, :]) non_gt_boxes = ds_utils.xyxy_to_xywh(entry['boxes'][non_gt_inds, :]) @@ -40,17 +43,20 @@ def _filter_crowd_proposals(roidb, crowd_thresh): bad_inds = np.where(ious.max(axis=1) > crowd_thresh)[0] overlaps[non_gt_inds[bad_inds], :] = -1 roidb[ix]['gt_overlaps'] = scipy.sparse.csr_matrix(overlaps) + return roidb + class coco(imdb): + def __init__(self, image_set, year): imdb.__init__(self, 'coco_' + year + '_' + image_set) # COCO specific config options - self.config = {'top_k' : 2000, - 'use_salt' : True, - 'cleanup' : True, - 'crowd_thresh' : 0.7, - 'min_size' : 2} + self.config = {'top_k': 2000, + 'use_salt': True, + 'cleanup': True, + 'crowd_thresh': 0.7, + 'min_size': 2} # name, paths self._year = year self._image_set = image_set @@ -71,20 +77,23 @@ def __init__(self, image_set, year): # For example, minival2014 is a random 5000 image subset of val2014. # This mapping tells us where the view's images and proposals come from. self._view_map = { - 'minival2014' : 'val2014', # 5k val2014 subset - 'valminusminival2014' : 'val2014', # val2014 \setminus minival2014 + 'minival2014': 'val2014', # 5k val2014 subset + 'valminusminival2014': 'val2014', # val2014 \setminus minival2014 } coco_name = image_set + year # e.g., "val2014" self._data_name = (self._view_map[coco_name] - if self._view_map.has_key(coco_name) + if coco_name in self._view_map else coco_name) + # Dataset splits that have ground-truth annotations (test splits # do not have gt annotations) self._gt_splits = ('train', 'val', 'minival') def _get_ann_file(self): - prefix = 'instances' if self._image_set.find('test') == -1 \ - else 'image_info' + prefix = 'instances' + if self._image_set.find('test') == -1: + prefix = 'image_info' + return osp.join(self._data_path, 'annotations', prefix + '_' + self._image_set + self._year + '.json') @@ -98,6 +107,7 @@ def _load_image_set_index(self): def _get_widths(self): anns = self._COCO.loadImgs(self._image_index) widths = [ann['width'] for ann in anns] + return widths def image_path_at(self, i): @@ -116,8 +126,11 @@ def image_path_from_index(self, index): str(index).zfill(12) + '.jpg') image_path = osp.join(self._data_path, 'images', self._data_name, file_name) - assert osp.exists(image_path), \ - 'Path does not exist: {}'.format(image_path) + assert( + osp.exists(image_path), + 'Path does not exist: {}'.format(image_path) + ) + return image_path def selective_search_roidb(self): @@ -141,21 +154,30 @@ def _roidb_from_proposals(self, method): if osp.exists(cache_file): with open(cache_file, 'rb') as fid: roidb = cPickle.load(fid) - print '{:s} {:s} roidb loaded from {:s}'.format(self.name, method, - cache_file) + + print('{:s} {:s} roidb loaded from {:s}'.format( + self.name, + method, + cache_file + ) + ) + return roidb if self._image_set in self._gt_splits: gt_roidb = self.gt_roidb() method_roidb = self._load_proposals(method, gt_roidb) roidb = imdb.merge_roidbs(gt_roidb, method_roidb) + # Make sure we don't use proposals that are contained in crowds roidb = _filter_crowd_proposals(roidb, self.config['crowd_thresh']) else: roidb = self._load_proposals(method, None) with open(cache_file, 'wb') as fid: cPickle.dump(roidb, fid, cPickle.HIGHEST_PROTOCOL) - print 'wrote {:s} roidb to {:s}'.format(method, cache_file) + + print('wrote {:s} roidb to {:s}'.format(method, cache_file)) + return roidb def _load_proposals(self, method, gt_roidb): @@ -175,12 +197,13 @@ def _load_proposals(self, method, gt_roidb): 'selective_search', 'edge_boxes_AR', 'edge_boxes_70'] - assert method in valid_methods + assert(method in valid_methods) + + print('Loading {} boxes'.format(method)) - print 'Loading {} boxes'.format(method) for i, index in enumerate(self._image_index): if i % 1000 == 0: - print '{:d} / {:d}'.format(i + 1, len(self._image_index)) + print('{:d} / {:d}'.format(i + 1, len(self._image_index))) box_file = osp.join( cfg.DATA_DIR, 'coco_proposals', method, 'mat', @@ -188,9 +211,11 @@ def _load_proposals(self, method, gt_roidb): raw_data = sio.loadmat(box_file)['boxes'] boxes = np.maximum(raw_data - 1, 0).astype(np.uint16) + if method == 'MCG': # Boxes from the MCG website are in (y1, x1, y2, x2) order boxes = boxes[:, (1, 0, 3, 2)] + # Remove duplicate boxes and very small boxes and then take top k keep = ds_utils.unique_boxes(boxes) boxes = boxes[keep, :] @@ -203,6 +228,7 @@ def _load_proposals(self, method, gt_roidb): width = im_ann['width'] height = im_ann['height'] ds_utils.validate_boxes(boxes, width=width, height=height) + return self.create_roidb_from_box_list(box_list, gt_roidb) def gt_roidb(self): @@ -214,7 +240,9 @@ def gt_roidb(self): if osp.exists(cache_file): with open(cache_file, 'rb') as fid: roidb = cPickle.load(fid) - print '{} gt roidb loaded from {}'.format(self.name, cache_file) + + print('{} gt roidb loaded from {}'.format(self.name, cache_file)) + return roidb gt_roidb = [self._load_coco_annotation(index) @@ -222,7 +250,9 @@ def gt_roidb(self): with open(cache_file, 'wb') as fid: cPickle.dump(gt_roidb, fid, cPickle.HIGHEST_PROTOCOL) - print 'wrote gt roidb to {}'.format(cache_file) + + print('wrote gt roidb to {}'.format(cache_file)) + return gt_roidb def _load_coco_annotation(self, index): @@ -266,6 +296,7 @@ def _load_coco_annotation(self, index): boxes[ix, :] = obj['clean_bbox'] gt_classes[ix] = cls seg_areas[ix] = obj['area'] + if obj['iscrowd']: # Set overlap to -1 for all classes for crowd objects # so they will be excluded during training @@ -275,27 +306,31 @@ def _load_coco_annotation(self, index): ds_utils.validate_boxes(boxes, width=width, height=height) overlaps = scipy.sparse.csr_matrix(overlaps) - return {'boxes' : boxes, + + return {'boxes': boxes, 'gt_classes': gt_classes, - 'gt_overlaps' : overlaps, - 'flipped' : False, - 'seg_areas' : seg_areas} + 'gt_overlaps': overlaps, + 'flipped': False, + 'seg_areas': seg_areas} def _get_box_file(self, index): # first 14 chars / first 22 chars / all chars + .mat # COCO_val2014_0/COCO_val2014_000000447/COCO_val2014_000000447991.mat file_name = ('COCO_' + self._data_name + '_' + str(index).zfill(12) + '.mat') + return osp.join(file_name[:14], file_name[:22], file_name) def _print_detection_eval_metrics(self, coco_eval): IoU_lo_thresh = 0.5 IoU_hi_thresh = 0.95 + def _get_thr_ind(coco_eval, thr): ind = np.where((coco_eval.params.iouThrs > thr - 1e-5) & (coco_eval.params.iouThrs < thr + 1e-5))[0][0] iou_thr = coco_eval.params.iouThrs[ind] - assert np.isclose(iou_thr, thr) + assert(np.isclose(iou_thr, thr)) + return ind ind_lo = _get_thr_ind(coco_eval, IoU_lo_thresh) @@ -303,21 +338,28 @@ def _get_thr_ind(coco_eval, thr): # precision has dims (iou, recall, cls, area range, max dets) # area range index 0: all area ranges # max dets index 2: 100 per image - precision = \ - coco_eval.eval['precision'][ind_lo:(ind_hi + 1), :, :, 0, 2] + precision = coco_eval.eval['precision'][ind_lo:(ind_hi + 1), :, :, 0, 2] ap_default = np.mean(precision[precision > -1]) - print ('~~~~ Mean and per-category AP @ IoU=[{:.2f},{:.2f}] ' - '~~~~').format(IoU_lo_thresh, IoU_hi_thresh) - print '{:.1f}'.format(100 * ap_default) + + print(('~~~~ Mean and per-category AP @ IoU=[{:.2f},{:.2f}] ' + '~~~~').format(IoU_lo_thresh, IoU_hi_thresh)) + print('{:.1f}'.format(100 * ap_default)) + for cls_ind, cls in enumerate(self.classes): + if cls == '__background__': continue # minus 1 because of __background__ - precision = coco_eval.eval['precision'][ind_lo:(ind_hi + 1), :, cls_ind - 1, 0, 2] + + precision = coco_eval.eval['precision'][ + ind_lo:(ind_hi + 1), :, cls_ind - 1, 0, 2 + ] + ap = np.mean(precision[precision > -1]) - print '{:.1f}'.format(100 * ap) - print '~~~~ Summary metrics ~~~~' + print('{:.1f}'.format(100 * ap)) + + print('~~~~ Summary metrics ~~~~') coco_eval.summarize() def _do_detection_eval(self, res_file, output_dir): @@ -329,9 +371,11 @@ def _do_detection_eval(self, res_file, output_dir): coco_eval.accumulate() self._print_detection_eval_metrics(coco_eval) eval_file = osp.join(output_dir, 'detection_results.pkl') + with open(eval_file, 'wb') as fid: cPickle.dump(coco_eval, fid, cPickle.HIGHEST_PROTOCOL) - print 'Wrote COCO eval results to: {}'.format(eval_file) + + print('Wrote COCO eval results to: {}'.format(eval_file)) def _coco_results_one_category(self, boxes, cat_id): results = [] @@ -345,10 +389,11 @@ def _coco_results_one_category(self, boxes, cat_id): ws = dets[:, 2] - xs + 1 hs = dets[:, 3] - ys + 1 results.extend( - [{'image_id' : index, - 'category_id' : cat_id, - 'bbox' : [xs[k], ys[k], ws[k], hs[k]], - 'score' : scores[k]} for k in xrange(dets.shape[0])]) + [{'image_id': index, + 'category_id': cat_id, + 'bbox': [xs[k], ys[k], ws[k], hs[k]], + 'score': scores[k]} for k in xrange(dets.shape[0])]) + return results def _write_coco_results_file(self, all_boxes, res_file): @@ -358,14 +403,21 @@ def _write_coco_results_file(self, all_boxes, res_file): # "score": 0.236}, ...] results = [] for cls_ind, cls in enumerate(self.classes): + if cls == '__background__': continue - print 'Collecting {} results ({:d}/{:d})'.format(cls, cls_ind, - self.num_classes - 1) + + print('Collecting {} results ({:d}/{:d})'.format( + cls, cls_ind, self.num_classes - 1) + ) + coco_cat_id = self._class_to_coco_cat_id[cls] - results.extend(self._coco_results_one_category(all_boxes[cls_ind], - coco_cat_id)) - print 'Writing results json to {}'.format(res_file) + results.extend(self._coco_results_one_category( + all_boxes[cls_ind], coco_cat_id) + ) + + print('Writing results json to {}'.format(res_file)) + with open(res_file, 'w') as fid: json.dump(results, fid) @@ -374,13 +426,17 @@ def evaluate_detections(self, all_boxes, output_dir): self._image_set + self._year + '_results')) + if self.config['use_salt']: res_file += '_{}'.format(str(uuid.uuid4())) + res_file += '.json' self._write_coco_results_file(all_boxes, res_file) + # Only do evaluation on non-test sets if self._image_set.find('test') == -1: self._do_detection_eval(res_file, output_dir) + # Optionally cleanup results json file if self.config['cleanup']: os.remove(res_file) diff --git a/lib/datasets/ds_utils.py b/lib/datasets/ds_utils.py index f66a7f687..a92189fe8 100644 --- a/lib/datasets/ds_utils.py +++ b/lib/datasets/ds_utils.py @@ -3,24 +3,28 @@ # Licensed under The MIT License [see LICENSE for details] # Written by Ross Girshick # -------------------------------------------------------- - import numpy as np + def unique_boxes(boxes, scale=1.0): """Return indices of unique boxes.""" v = np.array([1, 1e3, 1e6, 1e9]) hashes = np.round(boxes * scale).dot(v) _, index = np.unique(hashes, return_index=True) + return np.sort(index) + def xywh_to_xyxy(boxes): """Convert [x y w h] box format to [x1 y1 x2 y2] format.""" return np.hstack((boxes[:, 0:2], boxes[:, 0:2] + boxes[:, 2:4] - 1)) + def xyxy_to_xywh(boxes): """Convert [x1 y1 x2 y2] box format to [x y w h] format.""" return np.hstack((boxes[:, 0:2], boxes[:, 2:4] - boxes[:, 0:2] + 1)) + def validate_boxes(boxes, width=0, height=0): """Check that a set of boxes are valid.""" x1 = boxes[:, 0] @@ -34,8 +38,10 @@ def validate_boxes(boxes, width=0, height=0): assert (x2 < width).all() assert (y2 < height).all() + def filter_small_boxes(boxes, min_size): w = boxes[:, 2] - boxes[:, 0] h = boxes[:, 3] - boxes[:, 1] keep = np.where((w >= min_size) & (h > min_size))[0] + return keep diff --git a/lib/datasets/factory.py b/lib/datasets/factory.py index 8c3fdb898..77f721089 100644 --- a/lib/datasets/factory.py +++ b/lib/datasets/factory.py @@ -4,15 +4,16 @@ # Licensed under The MIT License [see LICENSE for details] # Written by Ross Girshick # -------------------------------------------------------- - -"""Factory method for easily getting imdbs by name.""" - -__sets = {} - from datasets.pascal_voc import pascal_voc from datasets.coco import coco import numpy as np + +""" +Factory method for easily getting imdbs by name. +""" +__sets = {} + # Set up voc__ using selective search "fast" mode for year in ['2007', '2012']: for split in ['train', 'val', 'trainval', 'test']: @@ -31,12 +32,14 @@ name = 'coco_{}_{}'.format(year, split) __sets[name] = (lambda split=split, year=year: coco(split, year)) + def get_imdb(name): """Get an imdb (image database) by name.""" - if not __sets.has_key(name): + if name not in __sets: raise KeyError('Unknown dataset: {}'.format(name)) return __sets[name]() + def list_imdbs(): """List all registered imdbs.""" return __sets.keys() diff --git a/lib/datasets/imdb.py b/lib/datasets/imdb.py index b56bf0a80..0f3acae41 100644 --- a/lib/datasets/imdb.py +++ b/lib/datasets/imdb.py @@ -4,7 +4,6 @@ # Licensed under The MIT License [see LICENSE for details] # Written by Ross Girshick # -------------------------------------------------------- - import os import os.path as osp import PIL @@ -13,6 +12,7 @@ import scipy.sparse from fast_rcnn.config import cfg + class imdb(object): """Image database.""" @@ -76,7 +76,7 @@ def cache_path(self): @property def num_images(self): - return len(self.image_index) + return len(self.image_index) def image_path_at(self, i): raise NotImplementedError @@ -96,24 +96,31 @@ def evaluate_detections(self, all_boxes, output_dir=None): raise NotImplementedError def _get_widths(self): - return [PIL.Image.open(self.image_path_at(i)).size[0] - for i in xrange(self.num_images)] + return [ + PIL.Image.open(self.image_path_at(i)).size[0] + for i in xrange(self.num_images) + ] def append_flipped_images(self): num_images = self.num_images widths = self._get_widths() + for i in xrange(num_images): boxes = self.roidb[i]['boxes'].copy() oldx1 = boxes[:, 0].copy() oldx2 = boxes[:, 2].copy() boxes[:, 0] = widths[i] - oldx2 - 1 boxes[:, 2] = widths[i] - oldx1 - 1 - assert (boxes[:, 2] >= boxes[:, 0]).all() - entry = {'boxes' : boxes, - 'gt_overlaps' : self.roidb[i]['gt_overlaps'], - 'gt_classes' : self.roidb[i]['gt_classes'], - 'flipped' : True} + + assert((boxes[:, 2] >= boxes[:, 0]).all()) + + entry = {'boxes': boxes, + 'gt_overlaps': self.roidb[i]['gt_overlaps'], + 'gt_classes': self.roidb[i]['gt_classes'], + 'flipped': True} + self.roidb.append(entry) + self._image_index = self._image_index * 2 def evaluate_recall(self, candidate_boxes=None, thresholds=None, @@ -129,21 +136,28 @@ def evaluate_recall(self, candidate_boxes=None, thresholds=None, """ # Record max overlap value for each gt box # Return vector of overlap values - areas = { 'all': 0, 'small': 1, 'medium': 2, 'large': 3, - '96-128': 4, '128-256': 5, '256-512': 6, '512-inf': 7} - area_ranges = [ [0**2, 1e5**2], # all - [0**2, 32**2], # small - [32**2, 96**2], # medium - [96**2, 1e5**2], # large - [96**2, 128**2], # 96-128 - [128**2, 256**2], # 128-256 - [256**2, 512**2], # 256-512 - [512**2, 1e5**2], # 512-inf - ] - assert areas.has_key(area), 'unknown area range: {}'.format(area) + areas = { + 'all': 0, 'small': 1, 'medium': 2, 'large': 3, + '96-128': 4, '128-256': 5, '256-512': 6, '512-inf': 7 + } + + area_ranges = [ + [0**2, 1e5**2], # all + [0**2, 32**2], # small + [32**2, 96**2], # medium + [96**2, 1e5**2], # large + [96**2, 128**2], # 96-128 + [128**2, 256**2], # 128-256 + [256**2, 512**2], # 256-512 + [512**2, 1e5**2] # 512-inf + ] + + assert(area in areas, 'unknown area range: {}'.format(area)) + area_range = area_ranges[areas[area]] gt_overlaps = np.zeros(0) num_pos = 0 + for i in xrange(self.num_images): # Checking for max_overlaps == 1 avoids including crowd annotations # (...pretty hacking :/) @@ -164,8 +178,10 @@ def evaluate_recall(self, candidate_boxes=None, thresholds=None, boxes = self.roidb[i]['boxes'][non_gt_inds, :] else: boxes = candidate_boxes[i] + if boxes.shape[0] == 0: continue + if limit is not None and boxes.shape[0] > limit: boxes = boxes[:limit, :] @@ -207,8 +223,11 @@ def evaluate_recall(self, candidate_boxes=None, thresholds=None, 'gt_overlaps': gt_overlaps} def create_roidb_from_box_list(self, box_list, gt_roidb): - assert len(box_list) == self.num_images, \ - 'Number of boxes must match number of ground-truth images' + assert( + len(box_list) == self.num_images, + 'Number of boxes must match number of ground-truth images' + ) + roidb = [] for i in xrange(self.num_images): boxes = box_list[i] @@ -227,11 +246,11 @@ def create_roidb_from_box_list(self, box_list, gt_roidb): overlaps = scipy.sparse.csr_matrix(overlaps) roidb.append({ - 'boxes' : boxes, - 'gt_classes' : np.zeros((num_boxes,), dtype=np.int32), - 'gt_overlaps' : overlaps, - 'flipped' : False, - 'seg_areas' : np.zeros((num_boxes,), dtype=np.float32), + 'boxes': boxes, + 'gt_classes': np.zeros((num_boxes,), dtype=np.int32), + 'gt_overlaps': overlaps, + 'flipped': False, + 'seg_areas': np.zeros((num_boxes,), dtype=np.float32), }) return roidb diff --git a/lib/datasets/pascal_voc.py b/lib/datasets/pascal_voc.py index b55f2f6b2..bfd7faa7c 100644 --- a/lib/datasets/pascal_voc.py +++ b/lib/datasets/pascal_voc.py @@ -4,7 +4,6 @@ # Licensed under The MIT License [see LICENSE for details] # Written by Ross Girshick # -------------------------------------------------------- - import os from datasets.imdb import imdb import datasets.ds_utils as ds_utils @@ -19,15 +18,20 @@ from voc_eval import voc_eval from fast_rcnn.config import cfg + class pascal_voc(imdb): def __init__(self, image_set, year, devkit_path=None): imdb.__init__(self, 'voc_' + year + '_' + image_set) self._year = year self._image_set = image_set - self._devkit_path = self._get_default_path() if devkit_path is None \ - else devkit_path + + if devkit_path is None: + self._devkit_path = self._get_default_path() + else: + self._devkit_path = devkit_path + self._data_path = os.path.join(self._devkit_path, 'VOC' + self._year) - self._classes = ('__background__', # always index 0 + self._classes = ('__background__', # always index 0 'aeroplane', 'bicycle', 'bird', 'boat', 'bottle', 'bus', 'car', 'cat', 'chair', 'cow', 'diningtable', 'dog', 'horse', @@ -42,17 +46,22 @@ def __init__(self, image_set, year, devkit_path=None): self._comp_id = 'comp4' # PASCAL specific config options - self.config = {'cleanup' : True, - 'use_salt' : True, - 'use_diff' : False, - 'matlab_eval' : False, - 'rpn_file' : None, - 'min_size' : 2} - - assert os.path.exists(self._devkit_path), \ - 'VOCdevkit path does not exist: {}'.format(self._devkit_path) - assert os.path.exists(self._data_path), \ - 'Path does not exist: {}'.format(self._data_path) + self.config = {'cleanup': True, + 'use_salt': True, + 'use_diff': False, + 'matlab_eval': False, + 'rpn_file': None, + 'min_size': 2} + + assert( + os.path.exists(self._devkit_path), + 'VOCdevkit path does not exist: {}'.format(self._devkit_path) + ) + + assert( + os.path.exists(self._data_path), + 'Path does not exist: {}'.format(self._data_path) + ) def image_path_at(self, i): """ @@ -66,8 +75,11 @@ def image_path_from_index(self, index): """ image_path = os.path.join(self._data_path, 'JPEGImages', index + self._image_ext) - assert os.path.exists(image_path), \ - 'Path does not exist: {}'.format(image_path) + assert( + os.path.exists(image_path), + 'Path does not exist: {}'.format(image_path) + ) + return image_path def _load_image_set_index(self): @@ -78,10 +90,14 @@ def _load_image_set_index(self): # self._devkit_path + /VOCdevkit2007/VOC2007/ImageSets/Main/val.txt image_set_file = os.path.join(self._data_path, 'ImageSets', 'Main', self._image_set + '.txt') - assert os.path.exists(image_set_file), \ - 'Path does not exist: {}'.format(image_set_file) + assert( + os.path.exists(image_set_file), + 'Path does not exist: {}'.format(image_set_file) + ) + with open(image_set_file) as f: image_index = [x.strip() for x in f.readlines()] + return image_index def _get_default_path(self): @@ -100,14 +116,17 @@ def gt_roidb(self): if os.path.exists(cache_file): with open(cache_file, 'rb') as fid: roidb = cPickle.load(fid) - print '{} gt roidb loaded from {}'.format(self.name, cache_file) + + print('{} gt roidb loaded from {}'.format(self.name, cache_file)) + return roidb gt_roidb = [self._load_pascal_annotation(index) for index in self.image_index] with open(cache_file, 'wb') as fid: cPickle.dump(gt_roidb, fid, cPickle.HIGHEST_PROTOCOL) - print 'wrote gt roidb to {}'.format(cache_file) + + print('wrote gt roidb to {}'.format(cache_file)) return gt_roidb @@ -118,13 +137,17 @@ def selective_search_roidb(self): This function loads/saves from/to a cache file to speed up future calls. """ - cache_file = os.path.join(self.cache_path, - self.name + '_selective_search_roidb.pkl') + cache_file = os.path.join( + self.cache_path, + self.name + '_selective_search_roidb.pkl' + ) if os.path.exists(cache_file): with open(cache_file, 'rb') as fid: roidb = cPickle.load(fid) - print '{} ss roidb loaded from {}'.format(self.name, cache_file) + + print('{} ss roidb loaded from {}'.format(self.name, cache_file)) + return roidb if int(self._year) == 2007 or self._image_set != 'test': @@ -133,9 +156,11 @@ def selective_search_roidb(self): roidb = imdb.merge_roidbs(gt_roidb, ss_roidb) else: roidb = self._load_selective_search_roidb(None) + with open(cache_file, 'wb') as fid: cPickle.dump(roidb, fid, cPickle.HIGHEST_PROTOCOL) - print 'wrote ss roidb to {}'.format(cache_file) + + print('wrote ss roidb to {}'.format(cache_file)) return roidb @@ -151,19 +176,27 @@ def rpn_roidb(self): def _load_rpn_roidb(self, gt_roidb): filename = self.config['rpn_file'] - print 'loading {}'.format(filename) - assert os.path.exists(filename), \ - 'rpn data not found at: {}'.format(filename) + + print('loading {}'.format(filename)) + assert( + os.path.exists(filename), + 'rpn data not found at: {}'.format(filename) + ) + with open(filename, 'rb') as f: box_list = cPickle.load(f) + return self.create_roidb_from_box_list(box_list, gt_roidb) def _load_selective_search_roidb(self, gt_roidb): filename = os.path.abspath(os.path.join(cfg.DATA_DIR, 'selective_search_data', self.name + '.mat')) - assert os.path.exists(filename), \ - 'Selective search data not found at: {}'.format(filename) + assert( + os.path.exists(filename), + 'Selective search data not found at: {}'.format(filename) + ) + raw_data = sio.loadmat(filename)['boxes'].ravel() box_list = [] @@ -185,6 +218,7 @@ def _load_pascal_annotation(self, index): filename = os.path.join(self._data_path, 'Annotations', index + '.xml') tree = ET.parse(filename) objs = tree.findall('object') + if not self.config['use_diff']: # Exclude the samples labeled as difficult non_diff_objs = [ @@ -193,6 +227,7 @@ def _load_pascal_annotation(self, index): # print 'Removed {} difficult objects'.format( # len(objs) - len(non_diff_objs)) objs = non_diff_objs + num_objs = len(objs) boxes = np.zeros((num_objs, 4), dtype=np.uint16) @@ -217,15 +252,19 @@ def _load_pascal_annotation(self, index): overlaps = scipy.sparse.csr_matrix(overlaps) - return {'boxes' : boxes, + return {'boxes': boxes, 'gt_classes': gt_classes, - 'gt_overlaps' : overlaps, - 'flipped' : False, - 'seg_areas' : seg_areas} + 'gt_overlaps': overlaps, + 'flipped': False, + 'seg_areas': seg_areas} def _get_comp_id(self): - comp_id = (self._comp_id + '_' + self._salt if self.config['use_salt'] - else self._comp_id) + + if self.config['use_salt']: + comp_id = self._comp_id + '_' + self._salt + else: + comp_id = self._comp_id + return comp_id def _get_voc_results_file_template(self): @@ -237,19 +276,26 @@ def _get_voc_results_file_template(self): 'VOC' + self._year, 'Main', filename) + return path def _write_voc_results_file(self, all_boxes): for cls_ind, cls in enumerate(self.classes): + if cls == '__background__': continue - print 'Writing {} VOC results file'.format(cls) + + print('Writing {} VOC results file'.format(cls)) + filename = self._get_voc_results_file_template().format(cls) + with open(filename, 'wt') as f: for im_ind, index in enumerate(self.image_index): dets = all_boxes[cls_ind][im_ind] + if dets == []: continue + # the VOCdevkit expects 1-based indices for k in xrange(dets.shape[0]): f.write('{:s} {:.3f} {:.1f} {:.1f} {:.1f} {:.1f}\n'. @@ -257,41 +303,51 @@ def _write_voc_results_file(self, all_boxes): dets[k, 0] + 1, dets[k, 1] + 1, dets[k, 2] + 1, dets[k, 3] + 1)) - def _do_python_eval(self, output_dir = 'output'): + def _do_python_eval(self, output_dir='output'): annopath = os.path.join( self._devkit_path, 'VOC' + self._year, 'Annotations', '{:s}.xml') + imagesetfile = os.path.join( self._devkit_path, 'VOC' + self._year, 'ImageSets', 'Main', self._image_set + '.txt') + cachedir = os.path.join(self._devkit_path, 'annotations_cache') aps = [] # The PASCAL VOC metric changed in 2010 use_07_metric = True if int(self._year) < 2010 else False - print 'VOC07 metric? ' + ('Yes' if use_07_metric else 'No') + + print('VOC07 metric? ' + ('Yes' if use_07_metric else 'No')) + if not os.path.isdir(output_dir): os.mkdir(output_dir) + for i, cls in enumerate(self._classes): if cls == '__background__': continue + filename = self._get_voc_results_file_template().format(cls) rec, prec, ap = voc_eval( filename, annopath, imagesetfile, cls, cachedir, ovthresh=0.5, use_07_metric=use_07_metric) aps += [ap] print('AP for {} = {:.4f}'.format(cls, ap)) + with open(os.path.join(output_dir, cls + '_pr.pkl'), 'w') as f: cPickle.dump({'rec': rec, 'prec': prec, 'ap': ap}, f) + print('Mean AP = {:.4f}'.format(np.mean(aps))) print('~~~~~~~~') print('Results:') + for ap in aps: print('{:.3f}'.format(ap)) + print('{:.3f}'.format(np.mean(aps))) print('~~~~~~~~') print('') @@ -303,9 +359,10 @@ def _do_python_eval(self, output_dir = 'output'): print('--------------------------------------------------------------') def _do_matlab_eval(self, output_dir='output'): - print '-----------------------------------------------------' - print 'Computing results with the official MATLAB eval code.' - print '-----------------------------------------------------' + print('-----------------------------------------------------') + print('Computing results with the official MATLAB eval code.') + print('-----------------------------------------------------') + path = os.path.join(cfg.ROOT_DIR, 'lib', 'datasets', 'VOCdevkit-matlab-wrapper') cmd = 'cd {} && '.format(path) @@ -314,18 +371,22 @@ def _do_matlab_eval(self, output_dir='output'): cmd += 'voc_eval(\'{:s}\',\'{:s}\',\'{:s}\',\'{:s}\'); quit;"' \ .format(self._devkit_path, self._get_comp_id(), self._image_set, output_dir) + print('Running:\n{}'.format(cmd)) status = subprocess.call(cmd, shell=True) def evaluate_detections(self, all_boxes, output_dir): self._write_voc_results_file(all_boxes) self._do_python_eval(output_dir) + if self.config['matlab_eval']: self._do_matlab_eval(output_dir) + if self.config['cleanup']: for cls in self._classes: if cls == '__background__': continue + filename = self._get_voc_results_file_template().format(cls) os.remove(filename) @@ -341,4 +402,5 @@ def competition_mode(self, on): from datasets.pascal_voc import pascal_voc d = pascal_voc('trainval', '2007') res = d.roidb - from IPython import embed; embed() + from IPython import embed + embed() diff --git a/lib/datasets/tools/mcg_munge.py b/lib/datasets/tools/mcg_munge.py index 1392aa308..69c272857 100644 --- a/lib/datasets/tools/mcg_munge.py +++ b/lib/datasets/tools/mcg_munge.py @@ -1,7 +1,8 @@ import os import sys -"""Hacky tool to convert file system layout of MCG boxes downloaded from +""" +Hacky tool to convert file system layout of MCG boxes downloaded from http://www.eecs.berkeley.edu/Research/Projects/CS/vision/grouping/mcg/ so that it's consistent with those computed by Jan Hosang (see: http://www.mpi-inf.mpg.de/departments/computer-vision-and-multimodal- @@ -12,6 +13,7 @@ Boxes from Hosang et al. are in (x1, y1, x2, y2) order. """ + def munge(src_dir): # stored as: ./MCG-COCO-val2014-boxes/COCO_val2014_000000193401.mat # want: ./MCG/mat/COCO_val2014_0/COCO_val2014_000000141/COCO_val2014_000000141334.mat @@ -24,15 +26,17 @@ def munge(src_dir): first = base[:14] second = base[:22] dst_dir = os.path.join('MCG', 'mat', first, second) + if not os.path.exists(dst_dir): os.makedirs(dst_dir) + src = os.path.join(src_dir, fn) dst = os.path.join(dst_dir, fn) - print 'MV: {} -> {}'.format(src, dst) + print('MV: {} -> {}'.format(src, dst)) os.rename(src, dst) if __name__ == '__main__': # src_dir should look something like: - # src_dir = 'MCG-COCO-val2014-boxes' + # src_dir = 'MCG-COCO-val2014-boxes' src_dir = sys.argv[1] munge(src_dir) diff --git a/lib/datasets/voc_eval.py b/lib/datasets/voc_eval.py index 8d0a83076..2c9c230b4 100644 --- a/lib/datasets/voc_eval.py +++ b/lib/datasets/voc_eval.py @@ -9,6 +9,7 @@ import cPickle import numpy as np + def parse_rec(filename): """ Parse a PASCAL VOC xml file """ tree = ET.parse(filename) @@ -28,6 +29,7 @@ def parse_rec(filename): return objects + def voc_ap(rec, prec, use_07_metric=False): """ ap = voc_ap(rec, prec, [use_07_metric]) Compute VOC AP given precision and recall. @@ -59,8 +61,10 @@ def voc_ap(rec, prec, use_07_metric=False): # and sum (\Delta recall) * prec ap = np.sum((mrec[i + 1] - mrec[i]) * mpre[i + 1]) + return ap + def voc_eval(detpath, annopath, imagesetfile, diff --git a/lib/fast_rcnn/bbox_transform.py b/lib/fast_rcnn/bbox_transform.py index c02916305..38aaae5aa 100644 --- a/lib/fast_rcnn/bbox_transform.py +++ b/lib/fast_rcnn/bbox_transform.py @@ -4,9 +4,9 @@ # Licensed under The MIT License [see LICENSE for details] # Written by Ross Girshick # -------------------------------------------------------- - import numpy as np + def bbox_transform(ex_rois, gt_rois): ex_widths = ex_rois[:, 2] - ex_rois[:, 0] + 1.0 ex_heights = ex_rois[:, 3] - ex_rois[:, 1] + 1.0 @@ -27,6 +27,7 @@ def bbox_transform(ex_rois, gt_rois): (targets_dx, targets_dy, targets_dw, targets_dh)).transpose() return targets + def bbox_transform_inv(boxes, deltas): if boxes.shape[0] == 0: return np.zeros((0, deltas.shape[1]), dtype=deltas.dtype) @@ -60,6 +61,7 @@ def bbox_transform_inv(boxes, deltas): return pred_boxes + def clip_boxes(boxes, im_shape): """ Clip boxes to image boundaries. @@ -73,4 +75,5 @@ def clip_boxes(boxes, im_shape): boxes[:, 2::4] = np.maximum(np.minimum(boxes[:, 2::4], im_shape[1] - 1), 0) # y2 < im_shape[0] boxes[:, 3::4] = np.maximum(np.minimum(boxes[:, 3::4], im_shape[0] - 1), 0) + return boxes diff --git a/lib/fast_rcnn/config.py b/lib/fast_rcnn/config.py index 253017369..4ac1ba45e 100644 --- a/lib/fast_rcnn/config.py +++ b/lib/fast_rcnn/config.py @@ -1,3 +1,9 @@ +import os +import os.path as osp +import numpy as np +# `pip install easydict` if you don't have it +from easydict import EasyDict as edict + # -------------------------------------------------------- # Fast R-CNN # Copyright (c) 2015 Microsoft @@ -5,7 +11,8 @@ # Written by Ross Girshick # -------------------------------------------------------- -"""Fast R-CNN config system. +""" +Fast R-CNN config system. This file specifies default config options for Fast R-CNN. You should not change values in this file. Instead, you should write a config file (in yaml) @@ -16,11 +23,6 @@ - See experiments/cfgs/*.yml for example YAML config override files """ -import os -import os.path as osp -import numpy as np -# `pip install easydict` if you don't have it -from easydict import EasyDict as edict __C = edict() # Consumers can get config by: @@ -154,11 +156,11 @@ # Test using these proposals __C.TEST.PROPOSAL_METHOD = 'selective_search' -## NMS threshold used on RPN proposals +# NMS threshold used on RPN proposals __C.TEST.RPN_NMS_THRESH = 0.7 -## Number of top scoring boxes to keep before apply NMS to RPN proposals +# Number of top scoring boxes to keep before apply NMS to RPN proposals __C.TEST.RPN_PRE_NMS_TOP_N = 6000 -## Number of top scoring boxes to keep after applying NMS to RPN proposals +# Number of top scoring boxes to keep after applying NMS to RPN proposals __C.TEST.RPN_POST_NMS_TOP_N = 300 # Proposal height and width both need to be greater than RPN_MIN_SIZE (at orig image scale) __C.TEST.RPN_MIN_SIZE = 16 @@ -218,10 +220,13 @@ def get_output_dir(imdb, net=None): outdir = osp.abspath(osp.join(__C.ROOT_DIR, 'output', __C.EXP_DIR, imdb.name)) if net is not None: outdir = osp.join(outdir, net.name) + if not os.path.exists(outdir): os.makedirs(outdir) + return outdir + def _merge_a_into_b(a, b): """Merge config dictionary a into config dictionary b, clobbering the options in b whenever they are also specified in a. @@ -231,7 +236,7 @@ def _merge_a_into_b(a, b): for k, v in a.iteritems(): # a must specify keys that are in b - if not b.has_key(k): + if k not in b: raise KeyError('{} is not a valid config key'.format(k)) # the types must match, too @@ -240,9 +245,13 @@ def _merge_a_into_b(a, b): if isinstance(b[k], np.ndarray): v = np.array(v, dtype=b[k].dtype) else: - raise ValueError(('Type mismatch ({} vs. {}) ' - 'for config key: {}').format(type(b[k]), - type(v), k)) + raise ValueError( + 'Type mismatch ({} vs. {}) for config key: {}'.format( + type(b[k]), + type(v), + k + ) + ) # recursively merge dicts if type(v) is edict: @@ -254,6 +263,7 @@ def _merge_a_into_b(a, b): else: b[k] = v + def cfg_from_file(filename): """Load a config file and merge it into the default options.""" import yaml @@ -262,24 +272,36 @@ def cfg_from_file(filename): _merge_a_into_b(yaml_cfg, __C) + def cfg_from_list(cfg_list): """Set config keys via list (e.g., from command line).""" from ast import literal_eval - assert len(cfg_list) % 2 == 0 + + assert(len(cfg_list) % 2 == 0) + for k, v in zip(cfg_list[0::2], cfg_list[1::2]): key_list = k.split('.') d = __C + for subkey in key_list[:-1]: - assert d.has_key(subkey) + assert(subkey in d) + d = d[subkey] + subkey = key_list[-1] - assert d.has_key(subkey) + + assert(subkey in d) + try: value = literal_eval(v) except: # handle the case when v is a string literal value = v - assert type(value) == type(d[subkey]), \ + assert( + type(value) == type(d[subkey]), 'type {} does not match original type {}'.format( - type(value), type(d[subkey])) + type(value), + type(d[subkey]) + ) + ) d[subkey] = value diff --git a/lib/fast_rcnn/nms_wrapper.py b/lib/fast_rcnn/nms_wrapper.py index d1a11db91..c421637ff 100644 --- a/lib/fast_rcnn/nms_wrapper.py +++ b/lib/fast_rcnn/nms_wrapper.py @@ -4,16 +4,17 @@ # Licensed under The MIT License [see LICENSE for details] # Written by Ross Girshick # -------------------------------------------------------- - from fast_rcnn.config import cfg from nms.gpu_nms import gpu_nms from nms.cpu_nms import cpu_nms + def nms(dets, thresh, force_cpu=False): """Dispatch to either CPU or GPU NMS implementations.""" if dets.shape[0] == 0: return [] + if cfg.USE_GPU_NMS and not force_cpu: return gpu_nms(dets, thresh, device_id=cfg.GPU_ID) else: diff --git a/lib/fast_rcnn/test.py b/lib/fast_rcnn/test.py index f889d0977..bd3fe4239 100644 --- a/lib/fast_rcnn/test.py +++ b/lib/fast_rcnn/test.py @@ -19,6 +19,7 @@ from utils.blob import im_list_to_blob import os + def _get_image_blob(im): """Converts an image into a network input. @@ -45,8 +46,14 @@ def _get_image_blob(im): # Prevent the biggest axis from being more than MAX_SIZE if np.round(im_scale * im_size_max) > cfg.TEST.MAX_SIZE: im_scale = float(cfg.TEST.MAX_SIZE) / float(im_size_max) - im = cv2.resize(im_orig, None, None, fx=im_scale, fy=im_scale, - interpolation=cv2.INTER_LINEAR) + + im = cv2.resize( + im_orig, + None, None, + fx=im_scale, fy=im_scale, + interpolation=cv2.INTER_LINEAR + ) + im_scale_factors.append(im_scale) processed_ims.append(im) @@ -55,8 +62,10 @@ def _get_image_blob(im): return blob, np.array(im_scale_factors) + def _get_rois_blob(im_rois, im_scale_factors): - """Converts RoIs into network inputs. + """ + Converts RoIs into network inputs. Arguments: im_rois (ndarray): R x 4 matrix of RoIs in original image coordinates @@ -67,8 +76,10 @@ def _get_rois_blob(im_rois, im_scale_factors): """ rois, levels = _project_im_rois(im_rois, im_scale_factors) rois_blob = np.hstack((levels, rois)) + return rois_blob.astype(np.float32, copy=False) + def _project_im_rois(im_rois, scales): """Project image RoIs into the image pyramid built by _get_image_blob. @@ -97,14 +108,18 @@ def _project_im_rois(im_rois, scales): return rois, levels + def _get_blobs(im, rois): """Convert an image and RoIs within that image into network inputs.""" - blobs = {'data' : None, 'rois' : None} + blobs = {'data': None, 'rois': None} blobs['data'], im_scale_factors = _get_image_blob(im) + if not cfg.TEST.HAS_RPN: blobs['rois'] = _get_rois_blob(rois, im_scale_factors) + return blobs, im_scale_factors + def im_detect(net, im, boxes=None): """Detect object classes in an image given object proposals. @@ -127,16 +142,21 @@ def im_detect(net, im, boxes=None): if cfg.DEDUP_BOXES > 0 and not cfg.TEST.HAS_RPN: v = np.array([1, 1e3, 1e6, 1e9, 1e12]) hashes = np.round(blobs['rois'] * cfg.DEDUP_BOXES).dot(v) - _, index, inv_index = np.unique(hashes, return_index=True, - return_inverse=True) + + _, index, inv_index = np.unique( + hashes, return_index=True, return_inverse=True + ) + blobs['rois'] = blobs['rois'][index, :] boxes = boxes[index, :] if cfg.TEST.HAS_RPN: im_blob = blobs['data'] + blobs['im_info'] = np.array( [[im_blob.shape[2], im_blob.shape[3], im_scales[0]]], - dtype=np.float32) + dtype=np.float32 + ) # reshape network inputs net.blobs['data'].reshape(*(blobs['data'].shape)) @@ -147,14 +167,18 @@ def im_detect(net, im, boxes=None): # do forward forward_kwargs = {'data': blobs['data'].astype(np.float32, copy=False)} + if cfg.TEST.HAS_RPN: - forward_kwargs['im_info'] = blobs['im_info'].astype(np.float32, copy=False) + forward_kwargs['im_info'] = blobs['im_info'].astype( + np.float32, copy=False + ) else: forward_kwargs['rois'] = blobs['rois'].astype(np.float32, copy=False) + blobs_out = net.forward(**forward_kwargs) if cfg.TEST.HAS_RPN: - assert len(im_scales) == 1, "Only single-image batch implemented" + assert(len(im_scales) == 1,) "Only single-image batch implemented" rois = net.blobs['rois'].data.copy() # unscale back to raw image space boxes = rois[:, 1:5] / im_scales[0] @@ -183,13 +207,16 @@ def im_detect(net, im, boxes=None): return scores, pred_boxes + def vis_detections(im, class_name, dets, thresh=0.3): """Visual debugging of detections.""" import matplotlib.pyplot as plt + im = im[:, :, (2, 1, 0)] for i in xrange(np.minimum(10, dets.shape[0])): bbox = dets[i, :4] score = dets[i, -1] + if score > thresh: plt.cla() plt.imshow(im) @@ -202,28 +229,36 @@ def vis_detections(im, class_name, dets, thresh=0.3): plt.title('{} {:.3f}'.format(class_name, score)) plt.show() + def apply_nms(all_boxes, thresh): - """Apply non-maximum suppression to all predicted boxes output by the + """ + Apply non-maximum suppression to all predicted boxes output by the test_net method. """ num_classes = len(all_boxes) num_images = len(all_boxes[0]) nms_boxes = [[[] for _ in xrange(num_images)] for _ in xrange(num_classes)] + for cls_ind in xrange(num_classes): for im_ind in xrange(num_images): dets = all_boxes[cls_ind][im_ind] + if dets == []: continue # CPU NMS is much faster than GPU NMS when the number of boxes # is relative small (e.g., < 10k) # TODO(rbg): autotune NMS dispatch keep = nms(dets, thresh, force_cpu=True) + if len(keep) == 0: continue + nms_boxes[cls_ind][im_ind] = dets[keep, :].copy() + return nms_boxes + def test_net(net, imdb, max_per_image=100, thresh=0.05, vis=False): """Test a Fast R-CNN network on an image database.""" num_images = len(imdb.image_index) @@ -283,13 +318,19 @@ def test_net(net, imdb, max_per_image=100, thresh=0.05, vis=False): all_boxes[j][i] = all_boxes[j][i][keep, :] _t['misc'].toc() - print 'im_detect: {:d}/{:d} {:.3f}s {:.3f}s' \ - .format(i + 1, num_images, _t['im_detect'].average_time, - _t['misc'].average_time) + print( + 'im_detect: {:d}/{:d} {:.3f}s {:.3f}s'.format( + i + 1, + num_images, + _t['im_detect'].average_time, + _t['misc'].average_time + ) + ) det_file = os.path.join(output_dir, 'detections.pkl') + with open(det_file, 'wb') as f: cPickle.dump(all_boxes, f, cPickle.HIGHEST_PROTOCOL) - print 'Evaluating detections' + print('Evaluating detections') imdb.evaluate_detections(all_boxes, output_dir) diff --git a/lib/fast_rcnn/train.py b/lib/fast_rcnn/train.py index 05bd594ff..cb6f7111d 100644 --- a/lib/fast_rcnn/train.py +++ b/lib/fast_rcnn/train.py @@ -13,10 +13,10 @@ from utils.timer import Timer import numpy as np import os - from caffe.proto import caffe_pb2 import google.protobuf as pb2 + class SolverWrapper(object): """A simple wrapper around Caffe's solver. This wrapper gives us control over he snapshotting process, which we @@ -28,22 +28,22 @@ def __init__(self, solver_prototxt, roidb, output_dir, """Initialize the SolverWrapper.""" self.output_dir = output_dir - if (cfg.TRAIN.HAS_RPN and cfg.TRAIN.BBOX_REG and - cfg.TRAIN.BBOX_NORMALIZE_TARGETS): + if (cfg.TRAIN.HAS_RPN and cfg.TRAIN.BBOX_REG and cfg.TRAIN.BBOX_NORMALIZE_TARGETS): # RPN can only use precomputed normalization because there are no # fixed statistics to compute a priori - assert cfg.TRAIN.BBOX_NORMALIZE_TARGETS_PRECOMPUTED + assert(cfg.TRAIN.BBOX_NORMALIZE_TARGETS_PRECOMPUTED) if cfg.TRAIN.BBOX_REG: - print 'Computing bounding-box regression targets...' - self.bbox_means, self.bbox_stds = \ - rdl_roidb.add_bbox_regression_targets(roidb) - print 'done' + print('Computing bounding-box regression targets...') + self.bbox_means, self.bbox_stds = rdl_roidb.add_bbox_regression_targets(roidb) + print('done') self.solver = caffe.SGDSolver(solver_prototxt) if pretrained_model is not None: - print ('Loading pretrained model ' - 'weights from {:s}').format(pretrained_model) + print('Loading pretrained model weights from {:s}'.format( + pretrained_model + ) + ) self.solver.net.copy_from(pretrained_model) self.solver_param = caffe_pb2.SolverParameter() @@ -58,9 +58,11 @@ def snapshot(self): """ net = self.solver.net - scale_bbox_params = (cfg.TRAIN.BBOX_REG and - cfg.TRAIN.BBOX_NORMALIZE_TARGETS and - net.params.has_key('bbox_pred')) + scale_bbox_params = ( + cfg.TRAIN.BBOX_REG and + cfg.TRAIN.BBOX_NORMALIZE_TARGETS and + 'bbox_pred' in net.params + ) if scale_bbox_params: # save original values @@ -68,26 +70,34 @@ def snapshot(self): orig_1 = net.params['bbox_pred'][1].data.copy() # scale and shift with bbox reg unnormalization; then save snapshot - net.params['bbox_pred'][0].data[...] = \ - (net.params['bbox_pred'][0].data * - self.bbox_stds[:, np.newaxis]) - net.params['bbox_pred'][1].data[...] = \ - (net.params['bbox_pred'][1].data * - self.bbox_stds + self.bbox_means) + net.params['bbox_pred'][0].data[...] = ( + net.params['bbox_pred'][0].data * self.bbox_stds[:, np.newaxis] + ) + + net.params['bbox_pred'][1].data[...] = ( + net.params['bbox_pred'][1].data * self.bbox_stds + self.bbox_means + ) infix = ('_' + cfg.TRAIN.SNAPSHOT_INFIX if cfg.TRAIN.SNAPSHOT_INFIX != '' else '') - filename = (self.solver_param.snapshot_prefix + infix + - '_iter_{:d}'.format(self.solver.iter) + '.caffemodel') + + filename = ( + self.solver_param.snapshot_prefix + infix + '_iter_{:d}'.format( + self.solver.iter + ) + '.caffemodel' + ) + filename = os.path.join(self.output_dir, filename) net.save(str(filename)) - print 'Wrote snapshot to: {:s}'.format(filename) + + print('Wrote snapshot to: {:s}'.format(filename)) if scale_bbox_params: # restore net to original state net.params['bbox_pred'][0].data[...] = orig_0 net.params['bbox_pred'][1].data[...] = orig_1 + return filename def train_model(self, max_iters): @@ -95,13 +105,14 @@ def train_model(self, max_iters): last_snapshot_iter = -1 timer = Timer() model_paths = [] + while self.solver.iter < max_iters: # Make one SGD update timer.tic() self.solver.step(1) timer.toc() if self.solver.iter % (10 * self.solver_param.display) == 0: - print 'speed: {:.3f}s / iter'.format(timer.average_time) + print('speed: {:.3f}s / iter'.format(timer.average_time)) if self.solver.iter % cfg.TRAIN.SNAPSHOT_ITERS == 0: last_snapshot_iter = self.solver.iter @@ -109,21 +120,26 @@ def train_model(self, max_iters): if last_snapshot_iter != self.solver.iter: model_paths.append(self.snapshot()) + return model_paths + def get_training_roidb(imdb): - """Returns a roidb (Region of Interest database) for use in training.""" + """ + Returns a roidb (Region of Interest database) for use in training. + """ if cfg.TRAIN.USE_FLIPPED: - print 'Appending horizontally-flipped training examples...' + print('Appending horizontally-flipped training examples...') imdb.append_flipped_images() - print 'done' + print('done') - print 'Preparing training data...' + print('Preparing training data...') rdl_roidb.prepare_roidb(imdb) - print 'done' + print('done') return imdb.roidb + def filter_roidb(roidb): """Remove roidb entries that have no usable RoIs.""" @@ -139,24 +155,34 @@ def is_valid(entry): (overlaps >= cfg.TRAIN.BG_THRESH_LO))[0] # image is only valid if such boxes exist valid = len(fg_inds) > 0 or len(bg_inds) > 0 + return valid num = len(roidb) filtered_roidb = [entry for entry in roidb if is_valid(entry)] num_after = len(filtered_roidb) - print 'Filtered {} roidb entries: {} -> {}'.format(num - num_after, - num, num_after) + + print('Filtered {} roidb entries: {} -> {}'.format( + num - num_after, + num, + num_after + ) + ) return filtered_roidb + def train_net(solver_prototxt, roidb, output_dir, pretrained_model=None, max_iters=40000): - """Train a Fast R-CNN network.""" + """ + Train a Fast R-CNN network. + """ roidb = filter_roidb(roidb) sw = SolverWrapper(solver_prototxt, roidb, output_dir, pretrained_model=pretrained_model) - print 'Solving...' + print('Solving...') model_paths = sw.train_model(max_iters) - print 'done solving' + print('done solving') + return model_paths diff --git a/lib/roi_data_layer/layer.py b/lib/roi_data_layer/layer.py index 9f145fea8..95cc9ccfd 100644 --- a/lib/roi_data_layer/layer.py +++ b/lib/roi_data_layer/layer.py @@ -5,7 +5,8 @@ # Written by Ross Girshick # -------------------------------------------------------- -"""The data layer used during training to train a Fast R-CNN network. +""" +The data layer used during training to train a Fast R-CNN network. RoIDataLayer implements a Caffe Python layer. """ @@ -17,6 +18,7 @@ import yaml from multiprocessing import Process, Queue + class RoIDataLayer(caffe.Layer): """Fast R-CNN data layer used for training.""" @@ -38,6 +40,7 @@ def _shuffle_roidb_inds(self): self._perm = inds else: self._perm = np.random.permutation(np.arange(len(self._roidb))) + self._cur = 0 def _get_next_minibatch_inds(self): @@ -47,6 +50,7 @@ def _get_next_minibatch_inds(self): db_inds = self._perm[self._cur:self._cur + cfg.TRAIN.IMS_PER_BATCH] self._cur += cfg.TRAIN.IMS_PER_BATCH + return db_inds def _get_next_minibatch(self): @@ -60,6 +64,7 @@ def _get_next_minibatch(self): else: db_inds = self._get_next_minibatch_inds() minibatch_db = [self._roidb[i] for i in db_inds] + return get_minibatch(minibatch_db, self._num_classes) def set_roidb(self, roidb): @@ -72,11 +77,13 @@ def set_roidb(self, roidb): self._roidb, self._num_classes) self._prefetch_process.start() + # Terminate the child process when the parent exists def cleanup(): - print 'Terminating BlobFetcher' + print('Terminating BlobFetcher') self._prefetch_process.terminate() self._prefetch_process.join() + import atexit atexit.register(cleanup) @@ -92,8 +99,11 @@ def setup(self, bottom, top): # data blob: holds a batch of N images, each with 3 channels idx = 0 - top[idx].reshape(cfg.TRAIN.IMS_PER_BATCH, 3, - max(cfg.TRAIN.SCALES), cfg.TRAIN.MAX_SIZE) + top[idx].reshape( + cfg.TRAIN.IMS_PER_BATCH, 3, + max(cfg.TRAIN.SCALES), cfg.TRAIN.MAX_SIZE + ) + self._name_to_top_map['data'] = idx idx += 1 @@ -105,7 +115,8 @@ def setup(self, bottom, top): top[idx].reshape(1, 4) self._name_to_top_map['gt_boxes'] = idx idx += 1 - else: # not using RPN + else: + # not using RPN # rois blob: holds R regions of interest, each is a 5-tuple # (n, x1, y1, x2, y2) specifying an image batch index n and a # rectangle (x1, y1, x2, y2) @@ -136,8 +147,8 @@ def setup(self, bottom, top): self._name_to_top_map['bbox_outside_weights'] = idx idx += 1 - print 'RoiDataLayer: name_to_top:', self._name_to_top_map - assert len(top) == len(self._name_to_top_map) + print('RoiDataLayer: name_to_top:', self._name_to_top_map) + assert(len(top) == len(self._name_to_top_map)) def forward(self, bottom, top): """Get blobs and copy them into this layer's top blob vector.""" @@ -158,6 +169,7 @@ def reshape(self, bottom, top): """Reshaping happens during the call to forward.""" pass + class BlobFetcher(Process): """Experimental class for prefetching blobs in a separate process.""" def __init__(self, queue, roidb, num_classes): @@ -188,7 +200,7 @@ def _get_next_minibatch_inds(self): return db_inds def run(self): - print 'BlobFetcher started' + print('BlobFetcher started') while True: db_inds = self._get_next_minibatch_inds() minibatch_db = [self._roidb[i] for i in db_inds] diff --git a/lib/roi_data_layer/minibatch.py b/lib/roi_data_layer/minibatch.py index f4535b022..2f3db2fa1 100644 --- a/lib/roi_data_layer/minibatch.py +++ b/lib/roi_data_layer/minibatch.py @@ -13,15 +13,23 @@ from fast_rcnn.config import cfg from utils.blob import prep_im_for_blob, im_list_to_blob + def get_minibatch(roidb, num_classes): """Given a roidb, construct a minibatch sampled from it.""" num_images = len(roidb) # Sample random scales to use for each image in this batch - random_scale_inds = npr.randint(0, high=len(cfg.TRAIN.SCALES), - size=num_images) - assert(cfg.TRAIN.BATCH_SIZE % num_images == 0), \ - 'num_images ({}) must divide BATCH_SIZE ({})'. \ - format(num_images, cfg.TRAIN.BATCH_SIZE) + random_scale_inds = npr.randint( + 0, high=len(cfg.TRAIN.SCALES), + size=num_images + ) + + assert( + cfg.TRAIN.BATCH_SIZE % num_images == 0, + 'num_images ({}) must divide BATCH_SIZE ({})'.format( + num_images, cfg.TRAIN.BATCH_SIZE + ) + ) + rois_per_image = cfg.TRAIN.BATCH_SIZE / num_images fg_rois_per_image = np.round(cfg.TRAIN.FG_FRACTION * rois_per_image) @@ -31,8 +39,8 @@ def get_minibatch(roidb, num_classes): blobs = {'data': im_blob} if cfg.TRAIN.HAS_RPN: - assert len(im_scales) == 1, "Single batch only" - assert len(roidb) == 1, "Single batch only" + assert(len(im_scales) == 1,) "Single batch only" + assert(len(roidb) == 1,) "Single batch only" # gt boxes: (x1, y1, x2, y2, cls) gt_inds = np.where(roidb[0]['gt_classes'] != 0)[0] gt_boxes = np.empty((len(gt_inds), 5), dtype=np.float32) @@ -42,12 +50,14 @@ def get_minibatch(roidb, num_classes): blobs['im_info'] = np.array( [[im_blob.shape[2], im_blob.shape[3], im_scales[0]]], dtype=np.float32) - else: # not using RPN + else: + # not using RPN # Now, build the region of interest and label blobs rois_blob = np.zeros((0, 5), dtype=np.float32) labels_blob = np.zeros((0), dtype=np.float32) bbox_targets_blob = np.zeros((0, 4 * num_classes), dtype=np.float32) bbox_inside_blob = np.zeros(bbox_targets_blob.shape, dtype=np.float32) + # all_overlaps = [] for im_i in xrange(num_images): labels, overlaps, im_rois, bbox_targets, bbox_inside_weights \ @@ -75,13 +85,14 @@ def get_minibatch(roidb, num_classes): if cfg.TRAIN.BBOX_REG: blobs['bbox_targets'] = bbox_targets_blob blobs['bbox_inside_weights'] = bbox_inside_blob - blobs['bbox_outside_weights'] = \ - np.array(bbox_inside_blob > 0).astype(np.float32) + blobs['bbox_outside_weights'] = np.array(bbox_inside_blob > 0).astype(np.float32) return blobs + def _sample_rois(roidb, fg_rois_per_image, rois_per_image, num_classes): - """Generate a random sample of RoIs comprising foreground and background + """ + Generate a random sample of RoIs comprising foreground and background examples. """ # label = class RoI has max overlap with @@ -97,7 +108,8 @@ def _sample_rois(roidb, fg_rois_per_image, rois_per_image, num_classes): # Sample foreground regions without replacement if fg_inds.size > 0: fg_inds = npr.choice( - fg_inds, size=fg_rois_per_this_image, replace=False) + fg_inds, size=fg_rois_per_this_image, replace=False + ) # Select background RoIs as those within [BG_THRESH_LO, BG_THRESH_HI) bg_inds = np.where((overlaps < cfg.TRAIN.BG_THRESH_HI) & @@ -110,7 +122,8 @@ def _sample_rois(roidb, fg_rois_per_image, rois_per_image, num_classes): # Sample foreground regions without replacement if bg_inds.size > 0: bg_inds = npr.choice( - bg_inds, size=bg_rois_per_this_image, replace=False) + bg_inds, size=bg_rois_per_this_image, replace=False + ) # The indices that we're selecting (both fg and bg) keep_inds = np.append(fg_inds, bg_inds) @@ -122,12 +135,15 @@ def _sample_rois(roidb, fg_rois_per_image, rois_per_image, num_classes): rois = rois[keep_inds] bbox_targets, bbox_inside_weights = _get_bbox_regression_labels( - roidb['bbox_targets'][keep_inds, :], num_classes) + roidb['bbox_targets'][keep_inds, :], num_classes + ) return labels, overlaps, rois, bbox_targets, bbox_inside_weights + def _get_image_blob(roidb, scale_inds): - """Builds an input blob from the images in the roidb at the specified + """ + Builds an input blob from the images in the roidb at the specified scales. """ num_images = len(roidb) @@ -148,11 +164,15 @@ def _get_image_blob(roidb, scale_inds): return blob, im_scales + def _project_im_rois(im_rois, im_scale_factor): - """Project image RoIs into the rescaled training image.""" + """ + Project image RoIs into the rescaled training image. + """ rois = im_rois * im_scale_factor return rois + def _get_bbox_regression_labels(bbox_target_data, num_classes): """Bounding-box regression targets are stored in a compact form in the roidb. @@ -175,10 +195,14 @@ def _get_bbox_regression_labels(bbox_target_data, num_classes): end = start + 4 bbox_targets[ind, start:end] = bbox_target_data[ind, 1:] bbox_inside_weights[ind, start:end] = cfg.TRAIN.BBOX_INSIDE_WEIGHTS + return bbox_targets, bbox_inside_weights + def _vis_minibatch(im_blob, rois_blob, labels_blob, overlaps): - """Visualize a mini-batch for debugging.""" + """ + Visualize a mini-batch for debugging. + """ import matplotlib.pyplot as plt for i in xrange(rois_blob.shape[0]): rois = rois_blob[i, :] @@ -190,7 +214,7 @@ def _vis_minibatch(im_blob, rois_blob, labels_blob, overlaps): im = im.astype(np.uint8) cls = labels_blob[i] plt.imshow(im) - print 'class: ', cls, ' overlap: ', overlaps[i] + print('class: ', cls, ' overlap: ', overlaps[i]) plt.gca().add_patch( plt.Rectangle((roi[0], roi[1]), roi[2] - roi[0], roi[3] - roi[1], fill=False, diff --git a/lib/roi_data_layer/roidb.py b/lib/roi_data_layer/roidb.py index 97a6a7612..a07ede114 100644 --- a/lib/roi_data_layer/roidb.py +++ b/lib/roi_data_layer/roidb.py @@ -5,7 +5,9 @@ # Written by Ross Girshick # -------------------------------------------------------- -"""Transform a roidb into a trainable roidb by adding a bunch of metadata.""" +""" +Transform a roidb into a trainable roidb by adding a bunch of metadata. +""" import numpy as np from fast_rcnn.config import cfg @@ -13,8 +15,10 @@ from utils.cython_bbox import bbox_overlaps import PIL + def prepare_roidb(imdb): - """Enrich the imdb's roidb by adding some derived quantities that + """ + Enrich the imdb's roidb by adding some derived quantities that are useful for training. This function precomputes the maximum overlap, taken over ground-truth boxes, between each ROI and each ground-truth box. The class with maximum overlap is also @@ -38,15 +42,18 @@ def prepare_roidb(imdb): # sanity checks # max overlap of 0 => class should be zero (background) zero_inds = np.where(max_overlaps == 0)[0] - assert all(max_classes[zero_inds] == 0) + assert(all(max_classes[zero_inds] == 0)) # max overlap > 0 => class should not be zero (must be a fg class) nonzero_inds = np.where(max_overlaps > 0)[0] - assert all(max_classes[nonzero_inds] != 0) + assert(all(max_classes[nonzero_inds] != 0)) + def add_bbox_regression_targets(roidb): - """Add information needed to train bounding-box regressors.""" - assert len(roidb) > 0 - assert 'max_classes' in roidb[0], 'Did you call prepare_roidb first?' + """ + Add information needed to train bounding-box regressors. + """ + assert(len(roidb) > 0) + assert('max_classes' in roidb[0], 'Did you call prepare_roidb first?') num_images = len(roidb) # Infer number of classes from the number of columns in gt_overlaps @@ -55,15 +62,18 @@ def add_bbox_regression_targets(roidb): rois = roidb[im_i]['boxes'] max_overlaps = roidb[im_i]['max_overlaps'] max_classes = roidb[im_i]['max_classes'] - roidb[im_i]['bbox_targets'] = \ - _compute_targets(rois, max_overlaps, max_classes) + roidb[im_i]['bbox_targets'] = _compute_targets( + rois, max_overlaps, max_classes + ) if cfg.TRAIN.BBOX_NORMALIZE_TARGETS_PRECOMPUTED: # Use fixed / precomputed "means" and "stds" instead of empirical values means = np.tile( - np.array(cfg.TRAIN.BBOX_NORMALIZE_MEANS), (num_classes, 1)) + np.array(cfg.TRAIN.BBOX_NORMALIZE_MEANS), (num_classes, 1) + ) stds = np.tile( - np.array(cfg.TRAIN.BBOX_NORMALIZE_STDS), (num_classes, 1)) + np.array(cfg.TRAIN.BBOX_NORMALIZE_STDS), (num_classes, 1) + ) else: # Compute values needed for means and stds # var(x) = E(x^2) - E(x)^2 @@ -77,22 +87,21 @@ def add_bbox_regression_targets(roidb): if cls_inds.size > 0: class_counts[cls] += cls_inds.size sums[cls, :] += targets[cls_inds, 1:].sum(axis=0) - squared_sums[cls, :] += \ - (targets[cls_inds, 1:] ** 2).sum(axis=0) + squared_sums[cls, :] += (targets[cls_inds, 1:] ** 2).sum(axis=0) means = sums / class_counts stds = np.sqrt(squared_sums / class_counts - means ** 2) - print 'bbox target means:' - print means - print means[1:, :].mean(axis=0) # ignore bg class - print 'bbox target stdevs:' - print stds - print stds[1:, :].mean(axis=0) # ignore bg class + print('bbox target means:') + print(means) + print(means[1:, :].mean(axis=0)) # ignore bg class + print('bbox target stdevs:') + print(stds) + print(stds[1:, :].mean(axis=0)) # ignore bg class # Normalize targets if cfg.TRAIN.BBOX_NORMALIZE_TARGETS: - print "Normalizing targets" + print("Normalizing targets") for im_i in xrange(num_images): targets = roidb[im_i]['bbox_targets'] for cls in xrange(1, num_classes): @@ -100,12 +109,13 @@ def add_bbox_regression_targets(roidb): roidb[im_i]['bbox_targets'][cls_inds, 1:] -= means[cls, :] roidb[im_i]['bbox_targets'][cls_inds, 1:] /= stds[cls, :] else: - print "NOT normalizing targets" + print("NOT normalizing targets") # These values will be needed for making predictions # (the predicts will need to be unnormalized and uncentered) return means.ravel(), stds.ravel() + def _compute_targets(rois, overlaps, labels): """Compute bounding-box regression targets for an image.""" # Indices of ground-truth ROIs @@ -130,4 +140,5 @@ def _compute_targets(rois, overlaps, labels): targets = np.zeros((rois.shape[0], 5), dtype=np.float32) targets[ex_inds, 0] = labels[ex_inds] targets[ex_inds, 1:] = bbox_transform(ex_rois, gt_rois) + return targets diff --git a/lib/rpn/anchor_target_layer.py b/lib/rpn/anchor_target_layer.py index 4563df1d2..e5463fb39 100644 --- a/lib/rpn/anchor_target_layer.py +++ b/lib/rpn/anchor_target_layer.py @@ -17,6 +17,7 @@ DEBUG = False + class AnchorTargetLayer(caffe.Layer): """ Assign anchors to ground-truth targets. Produces anchor classification @@ -31,13 +32,16 @@ def setup(self, bottom, top): self._feat_stride = layer_params['feat_stride'] if DEBUG: - print 'anchors:' - print self._anchors - print 'anchor shapes:' - print np.hstack(( - self._anchors[:, 2::4] - self._anchors[:, 0::4], - self._anchors[:, 3::4] - self._anchors[:, 1::4], - )) + print('anchors:') + print(self._anchors) + print('anchor shapes:') + print( + np.hstack(( + self._anchors[:, 2::4] - self._anchors[:, 0::4], + self._anchors[:, 3::4] - self._anchors[:, 1::4], + )) + ) + self._counts = cfg.EPS self._sums = np.zeros((1, 4)) self._squared_sums = np.zeros((1, 4)) @@ -50,7 +54,7 @@ def setup(self, bottom, top): height, width = bottom[0].data.shape[-2:] if DEBUG: - print 'AnchorTargetLayer: height', height, 'width', width + print('AnchorTargetLayer: height', height, 'width', width) A = self._num_anchors # labels @@ -71,8 +75,10 @@ def forward(self, bottom, top): # filter out-of-image anchors # measure GT overlap - assert bottom[0].data.shape[0] == 1, \ + assert( + bottom[0].data.shape[0] == 1, 'Only single item batches are supported' + ) # map of shape (..., H, W) height, width = bottom[0].data.shape[-2:] @@ -82,12 +88,12 @@ def forward(self, bottom, top): im_info = bottom[2].data[0, :] if DEBUG: - print '' - print 'im_size: ({}, {})'.format(im_info[0], im_info[1]) - print 'scale: {}'.format(im_info[2]) - print 'height, width: ({}, {})'.format(height, width) - print 'rpn: gt_boxes.shape', gt_boxes.shape - print 'rpn: gt_boxes', gt_boxes + print('') + print('im_size: ({}, {})'.format(im_info[0], im_info[1])) + print('scale: {}'.format(im_info[2])) + print('height, width: ({}, {})'.format(height, width)) + print('rpn: gt_boxes.shape', gt_boxes.shape) + print('rpn: gt_boxes', gt_boxes) # 1. Generate proposals from bbox deltas and shifted anchors shift_x = np.arange(0, width) * self._feat_stride @@ -115,13 +121,13 @@ def forward(self, bottom, top): )[0] if DEBUG: - print 'total_anchors', total_anchors - print 'inds_inside', len(inds_inside) + print('total_anchors', total_anchors) + print('inds_inside', len(inds_inside)) # keep only inside anchors anchors = all_anchors[inds_inside, :] if DEBUG: - print 'anchors.shape', anchors.shape + print('anchors.shape', anchors.shape) # label: 1 is positive, 0 is negative, -1 is dont care labels = np.empty((len(inds_inside), ), dtype=np.float32) @@ -168,8 +174,8 @@ def forward(self, bottom, top): disable_inds = npr.choice( bg_inds, size=(len(bg_inds) - num_bg), replace=False) labels[disable_inds] = -1 - #print "was %s inds, disabling %s, now %s inds" % ( - #len(bg_inds), len(disable_inds), np.sum(labels == 0)) + # print "was %s inds, disabling %s, now %s inds" % ( + # len(bg_inds), len(disable_inds), np.sum(labels == 0)) bbox_targets = np.zeros((len(inds_inside), 4), dtype=np.float32) bbox_targets = _compute_targets(anchors, gt_boxes[argmax_overlaps, :]) @@ -184,12 +190,17 @@ def forward(self, bottom, top): positive_weights = np.ones((1, 4)) * 1.0 / num_examples negative_weights = np.ones((1, 4)) * 1.0 / num_examples else: - assert ((cfg.TRAIN.RPN_POSITIVE_WEIGHT > 0) & - (cfg.TRAIN.RPN_POSITIVE_WEIGHT < 1)) - positive_weights = (cfg.TRAIN.RPN_POSITIVE_WEIGHT / - np.sum(labels == 1)) - negative_weights = ((1.0 - cfg.TRAIN.RPN_POSITIVE_WEIGHT) / - np.sum(labels == 0)) + assert( + (cfg.TRAIN.RPN_POSITIVE_WEIGHT > 0) & + (cfg.TRAIN.RPN_POSITIVE_WEIGHT < 1) + ) + positive_weights = ( + cfg.TRAIN.RPN_POSITIVE_WEIGHT / np.sum(labels == 1) + ) + negative_weights = ( + (1.0 - cfg.TRAIN.RPN_POSITIVE_WEIGHT) / np.sum(labels == 0) + ) + bbox_outside_weights[labels == 1, :] = positive_weights bbox_outside_weights[labels == 0, :] = negative_weights @@ -199,26 +210,30 @@ def forward(self, bottom, top): self._counts += np.sum(labels == 1) means = self._sums / self._counts stds = np.sqrt(self._squared_sums / self._counts - means ** 2) - print 'means:' - print means - print 'stdevs:' - print stds + print('means:') + print(means) + print('stdevs:') + print(stds) # map up to original set of anchors labels = _unmap(labels, total_anchors, inds_inside, fill=-1) bbox_targets = _unmap(bbox_targets, total_anchors, inds_inside, fill=0) - bbox_inside_weights = _unmap(bbox_inside_weights, total_anchors, inds_inside, fill=0) - bbox_outside_weights = _unmap(bbox_outside_weights, total_anchors, inds_inside, fill=0) + bbox_inside_weights = _unmap( + bbox_inside_weights, total_anchors, inds_inside, fill=0 + ) + bbox_outside_weights = _unmap( + bbox_outside_weights, total_anchors, inds_inside, fill=0 + ) if DEBUG: - print 'rpn: max max_overlap', np.max(max_overlaps) - print 'rpn: num_positive', np.sum(labels == 1) - print 'rpn: num_negative', np.sum(labels == 0) + print('rpn: max max_overlap', np.max(max_overlaps)) + print('rpn: num_positive', np.sum(labels == 1)) + print('rpn: num_negative', np.sum(labels == 0)) self._fg_sum += np.sum(labels == 1) self._bg_sum += np.sum(labels == 0) self._count += 1 - print 'rpn: num_positive avg', self._fg_sum / self._count - print 'rpn: num_negative avg', self._bg_sum / self._count + print('rpn: num_positive avg', self._fg_sum / self._count) + print('rpn: num_negative avg', self._bg_sum / self._count) # labels labels = labels.reshape((1, height, width, A)).transpose(0, 3, 1, 2) @@ -227,39 +242,51 @@ def forward(self, bottom, top): top[0].data[...] = labels # bbox_targets - bbox_targets = bbox_targets \ - .reshape((1, height, width, A * 4)).transpose(0, 3, 1, 2) + bbox_targets = bbox_targets.reshape( + (1, height, width, A * 4) + ).transpose(0, 3, 1, 2) + top[1].reshape(*bbox_targets.shape) top[1].data[...] = bbox_targets # bbox_inside_weights - bbox_inside_weights = bbox_inside_weights \ - .reshape((1, height, width, A * 4)).transpose(0, 3, 1, 2) - assert bbox_inside_weights.shape[2] == height - assert bbox_inside_weights.shape[3] == width + bbox_inside_weights = bbox_inside_weights.reshape( + (1, height, width, A * 4) + ).transpose(0, 3, 1, 2) + + assert(bbox_inside_weights.shape[2] == height) + assert(bbox_inside_weights.shape[3] == width) top[2].reshape(*bbox_inside_weights.shape) top[2].data[...] = bbox_inside_weights # bbox_outside_weights - bbox_outside_weights = bbox_outside_weights \ - .reshape((1, height, width, A * 4)).transpose(0, 3, 1, 2) - assert bbox_outside_weights.shape[2] == height - assert bbox_outside_weights.shape[3] == width + bbox_outside_weights = bbox_outside_weights.reshape( + (1, height, width, A * 4) + ).transpose(0, 3, 1, 2) + + assert(bbox_outside_weights.shape[2] == height) + assert(bbox_outside_weights.shape[3] == width) top[3].reshape(*bbox_outside_weights.shape) top[3].data[...] = bbox_outside_weights def backward(self, top, propagate_down, bottom): - """This layer does not propagate gradients.""" + """ + This layer does not propagate gradients. + """ pass def reshape(self, bottom, top): - """Reshaping happens during the call to forward.""" + """ + Reshaping happens during the call to forward. + """ pass def _unmap(data, count, inds, fill=0): - """ Unmap a subset of item (data) back to the original set of items (of - size count) """ + """ + Unmap a subset of item (data) back to the original set of items (of + size count) + """ if len(data.shape) == 1: ret = np.empty((count, ), dtype=np.float32) ret.fill(fill) @@ -272,10 +299,14 @@ def _unmap(data, count, inds, fill=0): def _compute_targets(ex_rois, gt_rois): - """Compute bounding-box regression targets for an image.""" + """ + Compute bounding-box regression targets for an image. + """ - assert ex_rois.shape[0] == gt_rois.shape[0] - assert ex_rois.shape[1] == 4 - assert gt_rois.shape[1] == 5 + assert(ex_rois.shape[0] == gt_rois.shape[0]) + assert(ex_rois.shape[1] == 4) + assert(gt_rois.shape[1] == 5) - return bbox_transform(ex_rois, gt_rois[:, :4]).astype(np.float32, copy=False) + return bbox_transform( + ex_rois, gt_rois[:, :4] + ).astype(np.float32, copy=False) diff --git a/lib/rpn/generate.py b/lib/rpn/generate.py index 060daf434..177d9a678 100644 --- a/lib/rpn/generate.py +++ b/lib/rpn/generate.py @@ -11,6 +11,7 @@ import numpy as np import cv2 + def _vis_proposals(im, dets, thresh=0.5): """Draw detected bounding boxes.""" inds = np.where(dets[:, -1] >= thresh)[0] @@ -36,16 +37,19 @@ def _vis_proposals(im, dets, thresh=0.5): bbox=dict(facecolor='blue', alpha=0.5), fontsize=14, color='white') - ax.set_title(('{} detections with ' - 'p({} | box) >= {:.1f}').format(class_name, class_name, - thresh), - fontsize=14) + ax.set_title( + '{} detections with p({} | box) >= {:.1f}'.format( + class_name, class_name, thresh), + fontsize=14 + ) plt.axis('off') plt.tight_layout() plt.draw() + def _get_image_blob(im): - """Converts an image into a network input. + """ + Converts an image into a network input. Arguments: im (ndarray): a color image in BGR order @@ -64,15 +68,19 @@ def _get_image_blob(im): processed_ims = [] - assert len(cfg.TEST.SCALES) == 1 + assert(len(cfg.TEST.SCALES) == 1) target_size = cfg.TEST.SCALES[0] im_scale = float(target_size) / float(im_size_min) # Prevent the biggest axis from being more than MAX_SIZE if np.round(im_scale * im_size_max) > cfg.TEST.MAX_SIZE: im_scale = float(cfg.TEST.MAX_SIZE) / float(im_size_max) - im = cv2.resize(im_orig, None, None, fx=im_scale, fy=im_scale, - interpolation=cv2.INTER_LINEAR) + + im = cv2.resize( + im_orig, None, None, fx=im_scale, fy=im_scale, + interpolation=cv2.INTER_LINEAR + ) + im_info = np.hstack((im.shape[:2], im_scale))[np.newaxis, :] processed_ims.append(im) @@ -81,6 +89,7 @@ def _get_image_blob(im): return blob, im_info + def im_proposals(net, im): """Generate RPN proposals on a single image.""" blobs = {} @@ -96,6 +105,7 @@ def im_proposals(net, im): scores = blobs_out['scores'].copy() return boxes, scores + def imdb_proposals(net, imdb): """Generate RPN proposals on all images in an imdb.""" @@ -106,8 +116,11 @@ def imdb_proposals(net, imdb): _t.tic() imdb_boxes[i], scores = im_proposals(net, im) _t.toc() - print 'im_proposals: {:d}/{:d} {:.3f}s' \ - .format(i + 1, imdb.num_images, _t.average_time) + + print('im_proposals: {:d}/{:d} {:.3f}s'.format( + i + 1, imdb.num_images, _t.average_time + ) + ) if 0: dets = np.hstack((imdb_boxes[i], scores)) # from IPython import embed; embed() diff --git a/lib/rpn/generate_anchors.py b/lib/rpn/generate_anchors.py index 1125a801f..d4c90e9f2 100644 --- a/lib/rpn/generate_anchors.py +++ b/lib/rpn/generate_anchors.py @@ -24,7 +24,7 @@ # -79 -167 96 184 # -167 -343 184 360 -#array([[ -83., -39., 100., 56.], +# array([[ -83., -39., 100., 56.], # [-175., -87., 192., 104.], # [-359., -183., 376., 200.], # [ -55., -55., 72., 72.], @@ -34,6 +34,7 @@ # [ -79., -167., 96., 184.], # [-167., -343., 184., 360.]]) + def generate_anchors(base_size=16, ratios=[0.5, 1, 2], scales=2**np.arange(3, 6)): """ @@ -43,10 +44,14 @@ def generate_anchors(base_size=16, ratios=[0.5, 1, 2], base_anchor = np.array([1, 1, base_size, base_size]) - 1 ratio_anchors = _ratio_enum(base_anchor, ratios) - anchors = np.vstack([_scale_enum(ratio_anchors[i, :], scales) - for i in xrange(ratio_anchors.shape[0])]) + anchors = np.vstack( + [_scale_enum(ratio_anchors[i, :], scales) + for i in xrange(ratio_anchors.shape[0])] + ) + return anchors + def _whctrs(anchor): """ Return width, height, x center, and y center for an anchor (window). @@ -56,8 +61,10 @@ def _whctrs(anchor): h = anchor[3] - anchor[1] + 1 x_ctr = anchor[0] + 0.5 * (w - 1) y_ctr = anchor[1] + 0.5 * (h - 1) + return w, h, x_ctr, y_ctr + def _mkanchors(ws, hs, x_ctr, y_ctr): """ Given a vector of widths (ws) and heights (hs) around a center @@ -72,6 +79,7 @@ def _mkanchors(ws, hs, x_ctr, y_ctr): y_ctr + 0.5 * (hs - 1))) return anchors + def _ratio_enum(anchor, ratios): """ Enumerate a set of anchors for each aspect ratio wrt an anchor. @@ -83,8 +91,10 @@ def _ratio_enum(anchor, ratios): ws = np.round(np.sqrt(size_ratios)) hs = np.round(ws * ratios) anchors = _mkanchors(ws, hs, x_ctr, y_ctr) + return anchors + def _scale_enum(anchor, scales): """ Enumerate a set of anchors for each scale wrt an anchor. @@ -94,12 +104,15 @@ def _scale_enum(anchor, scales): ws = w * scales hs = h * scales anchors = _mkanchors(ws, hs, x_ctr, y_ctr) + return anchors if __name__ == '__main__': import time t = time.time() a = generate_anchors() - print time.time() - t - print a - from IPython import embed; embed() + print(time.time() - t) + print(a) + + from IPython import embed + embed() diff --git a/lib/rpn/proposal_layer.py b/lib/rpn/proposal_layer.py index b157160b3..2fd14f2ff 100644 --- a/lib/rpn/proposal_layer.py +++ b/lib/rpn/proposal_layer.py @@ -15,6 +15,7 @@ DEBUG = False + class ProposalLayer(caffe.Layer): """ Outputs object detection proposals by applying estimated bounding-box @@ -31,9 +32,9 @@ def setup(self, bottom, top): self._num_anchors = self._anchors.shape[0] if DEBUG: - print 'feat_stride: {}'.format(self._feat_stride) - print 'anchors:' - print self._anchors + print('feat_stride: {}'.format(self._feat_stride)) + print('anchors:') + print(self._anchors) # rois blob: holds R regions of interest, each is a 5-tuple # (n, x1, y1, x2, y2) specifying an image batch index n and a @@ -58,14 +59,16 @@ def forward(self, bottom, top): # take after_nms_topN proposals after NMS # return the top proposals (-> RoIs top, scores top) - assert bottom[0].data.shape[0] == 1, \ + assert( + bottom[0].data.shape[0] == 1, 'Only single item batches are supported' + ) - cfg_key = str(self.phase) # either 'TRAIN' or 'TEST' - pre_nms_topN = cfg[cfg_key].RPN_PRE_NMS_TOP_N + cfg_key = str(self.phase) # either 'TRAIN' or 'TEST' + pre_nms_topN = cfg[cfg_key].RPN_PRE_NMS_TOP_N post_nms_topN = cfg[cfg_key].RPN_POST_NMS_TOP_N - nms_thresh = cfg[cfg_key].RPN_NMS_THRESH - min_size = cfg[cfg_key].RPN_MIN_SIZE + nms_thresh = cfg[cfg_key].RPN_NMS_THRESH + min_size = cfg[cfg_key].RPN_MIN_SIZE # the first set of _num_anchors channels are bg probs # the second set are the fg probs, which we want @@ -74,14 +77,14 @@ def forward(self, bottom, top): im_info = bottom[2].data[0, :] if DEBUG: - print 'im_size: ({}, {})'.format(im_info[0], im_info[1]) - print 'scale: {}'.format(im_info[2]) + print('im_size: ({}, {})'.format(im_info[0], im_info[1])) + print('scale: {}'.format(im_info[2])) # 1. Generate proposals from bbox deltas and shifted anchors height, width = scores.shape[-2:] if DEBUG: - print 'score map size: {}'.format(scores.shape) + print('score map size: {}'.format(scores.shape)) # Enumerate all shifts shift_x = np.arange(0, width) * self._feat_stride @@ -98,8 +101,10 @@ def forward(self, bottom, top): # reshape to (K*A, 4) shifted anchors A = self._num_anchors K = shifts.shape[0] - anchors = self._anchors.reshape((1, A, 4)) + \ - shifts.reshape((1, K, 4)).transpose((1, 0, 2)) + anchors = self._anchors.reshape((1, A, 4)) + shifts.reshape( + (1, K, 4) + ).transpose((1, 0, 2)) + anchors = anchors.reshape((K * A, 4)) # Transpose and reshape predicted bbox transformations to get them @@ -133,8 +138,10 @@ def forward(self, bottom, top): # 4. sort all (proposal, score) pairs by score from highest to lowest # 5. take top pre_nms_topN (e.g. 6000) order = scores.ravel().argsort()[::-1] + if pre_nms_topN > 0: order = order[:pre_nms_topN] + proposals = proposals[order, :] scores = scores[order] @@ -142,8 +149,10 @@ def forward(self, bottom, top): # 7. take after_nms_topN (e.g. 300) # 8. return the top proposals (-> RoIs top) keep = nms(np.hstack((proposals, scores)), nms_thresh) + if post_nms_topN > 0: keep = keep[:post_nms_topN] + proposals = proposals[keep, :] scores = scores[keep] @@ -151,7 +160,9 @@ def forward(self, bottom, top): # Our RPN implementation only supports a single input image, so all # batch inds are 0 batch_inds = np.zeros((proposals.shape[0], 1), dtype=np.float32) - blob = np.hstack((batch_inds, proposals.astype(np.float32, copy=False))) + blob = np.hstack( + (batch_inds, proposals.astype(np.float32, copy=False)) + ) top[0].reshape(*(blob.shape)) top[0].data[...] = blob @@ -161,16 +172,24 @@ def forward(self, bottom, top): top[1].data[...] = scores def backward(self, top, propagate_down, bottom): - """This layer does not propagate gradients.""" + """ + This layer does not propagate gradients. + """ pass def reshape(self, bottom, top): - """Reshaping happens during the call to forward.""" + """ + Reshaping happens during the call to forward. + """ pass + def _filter_boxes(boxes, min_size): - """Remove all boxes with any side smaller than min_size.""" + """ + Remove all boxes with any side smaller than min_size. + """ ws = boxes[:, 2] - boxes[:, 0] + 1 hs = boxes[:, 3] - boxes[:, 1] + 1 keep = np.where((ws >= min_size) & (hs >= min_size))[0] + return keep diff --git a/lib/rpn/proposal_target_layer.py b/lib/rpn/proposal_target_layer.py index 38e1f2c88..b7440161b 100644 --- a/lib/rpn/proposal_target_layer.py +++ b/lib/rpn/proposal_target_layer.py @@ -15,6 +15,7 @@ DEBUG = False + class ProposalTargetLayer(caffe.Layer): """ Assign object detection proposals to ground-truth targets. Produces proposal @@ -52,8 +53,10 @@ def forward(self, bottom, top): ) # Sanity check: single batch only - assert np.all(all_rois[:, 0] == 0), \ - 'Only single item batches are supported' + assert( + np.all(all_rois[:, 0] == 0), + 'Only single item batches are supported' + ) num_images = 1 rois_per_image = cfg.TRAIN.BATCH_SIZE / num_images @@ -66,14 +69,14 @@ def forward(self, bottom, top): rois_per_image, self._num_classes) if DEBUG: - print 'num fg: {}'.format((labels > 0).sum()) - print 'num bg: {}'.format((labels == 0).sum()) + print('num fg: {}'.format((labels > 0).sum())) + print('num bg: {}'.format((labels == 0).sum())) self._count += 1 self._fg_num += (labels > 0).sum() self._bg_num += (labels == 0).sum() - print 'num fg avg: {}'.format(self._fg_num / self._count) - print 'num bg avg: {}'.format(self._bg_num / self._count) - print 'ratio: {:.3f}'.format(float(self._fg_num) / float(self._bg_num)) + print('num fg avg: {}'.format(self._fg_num / self._count)) + print('num bg avg: {}'.format(self._bg_num / self._count)) + print('ratio: {:.3f}'.format(float(self._fg_num) / float(self._bg_num))) # sampled rois top[0].reshape(*rois.shape) @@ -96,16 +99,21 @@ def forward(self, bottom, top): top[4].data[...] = np.array(bbox_inside_weights > 0).astype(np.float32) def backward(self, top, propagate_down, bottom): - """This layer does not propagate gradients.""" + """ + This layer does not propagate gradients. + """ pass def reshape(self, bottom, top): - """Reshaping happens during the call to forward.""" + """ + Reshaping happens during the call to forward. + """ pass def _get_bbox_regression_labels(bbox_target_data, num_classes): - """Bounding-box regression targets (bbox_target_data) are stored in a + """ + Bounding-box regression targets (bbox_target_data) are stored in a compact form N x (class, tx, ty, tw, th) This function expands those targets into the 4-of-4*K representation used @@ -126,32 +134,44 @@ def _get_bbox_regression_labels(bbox_target_data, num_classes): end = start + 4 bbox_targets[ind, start:end] = bbox_target_data[ind, 1:] bbox_inside_weights[ind, start:end] = cfg.TRAIN.BBOX_INSIDE_WEIGHTS + return bbox_targets, bbox_inside_weights def _compute_targets(ex_rois, gt_rois, labels): - """Compute bounding-box regression targets for an image.""" + """ + Compute bounding-box regression targets for an image. + """ - assert ex_rois.shape[0] == gt_rois.shape[0] - assert ex_rois.shape[1] == 4 - assert gt_rois.shape[1] == 4 + assert(ex_rois.shape[0] == gt_rois.shape[0]) + assert(ex_rois.shape[1] == 4) + assert(gt_rois.shape[1] == 4) targets = bbox_transform(ex_rois, gt_rois) if cfg.TRAIN.BBOX_NORMALIZE_TARGETS_PRECOMPUTED: # Optionally normalize targets by a precomputed mean and stdev - targets = ((targets - np.array(cfg.TRAIN.BBOX_NORMALIZE_MEANS)) - / np.array(cfg.TRAIN.BBOX_NORMALIZE_STDS)) + targets = ( + (targets - np.array(cfg.TRAIN.BBOX_NORMALIZE_MEANS)) / np.array( + cfg.TRAIN.BBOX_NORMALIZE_STDS + ) + ) + return np.hstack( - (labels[:, np.newaxis], targets)).astype(np.float32, copy=False) + (labels[:, np.newaxis], targets) + ).astype(np.float32, copy=False) + def _sample_rois(all_rois, gt_boxes, fg_rois_per_image, rois_per_image, num_classes): - """Generate a random sample of RoIs comprising foreground and background + """ + Generate a random sample of RoIs comprising foreground and background examples. """ # overlaps: (rois x gt_boxes) overlaps = bbox_overlaps( np.ascontiguousarray(all_rois[:, 1:5], dtype=np.float), - np.ascontiguousarray(gt_boxes[:, :4], dtype=np.float)) + np.ascontiguousarray(gt_boxes[:, :4], dtype=np.float) + ) + gt_assignment = overlaps.argmax(axis=1) max_overlaps = overlaps.max(axis=1) labels = gt_boxes[gt_assignment, 4] @@ -163,7 +183,9 @@ def _sample_rois(all_rois, gt_boxes, fg_rois_per_image, rois_per_image, num_clas fg_rois_per_this_image = min(fg_rois_per_image, fg_inds.size) # Sample foreground regions without replacement if fg_inds.size > 0: - fg_inds = npr.choice(fg_inds, size=fg_rois_per_this_image, replace=False) + fg_inds = npr.choice( + fg_inds, size=fg_rois_per_this_image, replace=False + ) # Select background RoIs as those within [BG_THRESH_LO, BG_THRESH_HI) bg_inds = np.where((max_overlaps < cfg.TRAIN.BG_THRESH_HI) & @@ -172,9 +194,12 @@ def _sample_rois(all_rois, gt_boxes, fg_rois_per_image, rois_per_image, num_clas # against there being fewer than desired) bg_rois_per_this_image = rois_per_image - fg_rois_per_this_image bg_rois_per_this_image = min(bg_rois_per_this_image, bg_inds.size) + # Sample background regions without replacement if bg_inds.size > 0: - bg_inds = npr.choice(bg_inds, size=bg_rois_per_this_image, replace=False) + bg_inds = npr.choice( + bg_inds, size=bg_rois_per_this_image, replace=False + ) # The indices that we're selecting (both fg and bg) keep_inds = np.append(fg_inds, bg_inds) diff --git a/lib/setup.py b/lib/setup.py index 0f4615f70..c7b88d8e5 100644 --- a/lib/setup.py +++ b/lib/setup.py @@ -13,6 +13,7 @@ import subprocess import numpy as np + def find_in_path(name, path): "Find a file in a search path" # Adapted fom @@ -43,16 +44,23 @@ def locate_cuda(): default_path = pjoin(os.sep, 'usr', 'local', 'cuda', 'bin') nvcc = find_in_path('nvcc', os.environ['PATH'] + os.pathsep + default_path) if nvcc is None: - raise EnvironmentError('The nvcc binary could not be ' - 'located in your $PATH. Either add it to your path, or set $CUDAHOME') + raise EnvironmentError( + 'The nvcc binary could not be located in your $PATH.' + 'Either add it to your path, or set $CUDAHOME' + ) home = os.path.dirname(os.path.dirname(nvcc)) - cudaconfig = {'home':home, 'nvcc':nvcc, - 'include': pjoin(home, 'include'), - 'lib64': pjoin(home, 'lib64')} + cudaconfig = { + 'home': home, 'nvcc': nvcc, + 'include': pjoin(home, 'include'), + 'lib64': pjoin(home, 'lib64') + } + for k, v in cudaconfig.iteritems(): if not os.path.exists(v): - raise EnvironmentError('The CUDA %s path could not be located in %s' % (k, v)) + raise EnvironmentError( + 'The CUDA %s path could not be located in %s' % (k, v) + ) return cudaconfig CUDA = locate_cuda() @@ -64,6 +72,7 @@ def locate_cuda(): except AttributeError: numpy_include = np.get_numpy_include() + def customize_compiler_for_nvcc(self): """inject deep into distutils to customize how the dispatch to gcc/nvcc works. @@ -114,15 +123,16 @@ def build_extensions(self): "utils.cython_bbox", ["utils/bbox.pyx"], extra_compile_args={'gcc': ["-Wno-cpp", "-Wno-unused-function"]}, - include_dirs = [numpy_include] + include_dirs=[numpy_include] ), Extension( "nms.cpu_nms", ["nms/cpu_nms.pyx"], extra_compile_args={'gcc': ["-Wno-cpp", "-Wno-unused-function"]}, - include_dirs = [numpy_include] + include_dirs=[numpy_include] ), - Extension('nms.gpu_nms', + Extension( + 'nms.gpu_nms', ['nms/nms_kernel.cu', 'nms/gpu_nms.pyx'], library_dirs=[CUDA['lib64']], libraries=['cudart'], @@ -131,18 +141,22 @@ def build_extensions(self): # this syntax is specific to this build system # we're only going to use certain compiler args with nvcc and not with # gcc the implementation of this trick is in customize_compiler() below - extra_compile_args={'gcc': ["-Wno-unused-function"], - 'nvcc': ['-arch=sm_35', - '--ptxas-options=-v', - '-c', - '--compiler-options', - "'-fPIC'"]}, - include_dirs = [numpy_include, CUDA['include']] + extra_compile_args={ + 'gcc': ["-Wno-unused-function"], + 'nvcc': [ + '-arch=sm_35', + '--ptxas-options=-v', + '-c', + '--compiler-options', + "'-fPIC'" + ] + }, + include_dirs=[numpy_include, CUDA['include']] ), Extension( 'pycocotools._mask', sources=['pycocotools/maskApi.c', 'pycocotools/_mask.pyx'], - include_dirs = [numpy_include, 'pycocotools'], + include_dirs=[numpy_include, 'pycocotools'], extra_compile_args={ 'gcc': ['-Wno-cpp', '-Wno-unused-function', '-std=c99']}, ), diff --git a/lib/transform/torch_image_transform_layer.py b/lib/transform/torch_image_transform_layer.py index 9273b3a76..c6a209d3f 100644 --- a/lib/transform/torch_image_transform_layer.py +++ b/lib/transform/torch_image_transform_layer.py @@ -24,6 +24,7 @@ from fast_rcnn.config import cfg import numpy as np + class TorchImageTransformLayer(caffe.Layer): def setup(self, bottom, top): # (1, 3, 1, 1) shaped arrays diff --git a/lib/utils/blob.py b/lib/utils/blob.py index 1c316427a..adab9df44 100644 --- a/lib/utils/blob.py +++ b/lib/utils/blob.py @@ -10,6 +10,7 @@ import numpy as np import cv2 + def im_list_to_blob(ims): """Convert a list of images into a network input. @@ -28,6 +29,7 @@ def im_list_to_blob(ims): blob = blob.transpose(channel_swap) return blob + def prep_im_for_blob(im, pixel_means, target_size, max_size): """Mean subtract and scale an image for use in a blob.""" im = im.astype(np.float32, copy=False) diff --git a/lib/utils/timer.py b/lib/utils/timer.py index dacc942c2..a7bf4c7a4 100644 --- a/lib/utils/timer.py +++ b/lib/utils/timer.py @@ -7,6 +7,7 @@ import time + class Timer(object): """A simple timer.""" def __init__(self): @@ -26,6 +27,7 @@ def toc(self, average=True): self.total_time += self.diff self.calls += 1 self.average_time = self.total_time / self.calls + if average: return self.average_time else: diff --git a/tools/compress_net.py b/tools/compress_net.py index e044e5bcd..f4d25deee 100755 --- a/tools/compress_net.py +++ b/tools/compress_net.py @@ -13,20 +13,30 @@ import caffe import argparse import numpy as np -import os, sys +import os +import sys + def parse_args(): """Parse input arguments.""" - parser = argparse.ArgumentParser(description='Compress a Fast R-CNN network') - parser.add_argument('--def', dest='prototxt', - help='prototxt file defining the uncompressed network', - default=None, type=str) - parser.add_argument('--def-svd', dest='prototxt_svd', - help='prototxt file defining the SVD compressed network', - default=None, type=str) - parser.add_argument('--net', dest='caffemodel', - help='model to compress', - default=None, type=str) + parser = argparse.ArgumentParser( + description='Compress a Fast R-CNN network' + ) + parser.add_argument( + '--def', dest='prototxt', + help='prototxt file defining the uncompressed network', + default=None, type=str + ) + parser.add_argument( + '--def-svd', dest='prototxt_svd', + help='prototxt file defining the SVD compressed network', + default=None, type=str + ) + parser.add_argument( + '--net', dest='caffemodel', + help='model to compress', + default=None, type=str + ) if len(sys.argv) == 1: parser.print_help() @@ -35,8 +45,10 @@ def parse_args(): args = parser.parse_args() return args + def compress_weights(W, l): - """Compress the weight matrix W of an inner product (fully connected) layer + """ + Compress the weight matrix W of an inner product (fully connected) layer using truncated SVD. Parameters: @@ -56,8 +68,10 @@ def compress_weights(W, l): Vl = V[:l, :] L = np.dot(np.diag(sl), Vl) + return Ul, L + def main(): args = parse_args() @@ -69,14 +83,16 @@ def main(): # caffemodel = 'snapshots/vgg16_fast_rcnn_iter_40000.caffemodel' net_svd = caffe.Net(args.prototxt_svd, args.caffemodel, caffe.TEST) - print('Uncompressed network {} : {}'.format(args.prototxt, args.caffemodel)) + print( + 'Uncompressed network {} : {}'.format(args.prototxt, args.caffemodel) + ) print('Compressed network prototxt {}'.format(args.prototxt_svd)) out = os.path.splitext(os.path.basename(args.caffemodel))[0] + '_svd' out_dir = os.path.dirname(args.caffemodel) # Compress fc6 - if net_svd.params.has_key('fc6_L'): + if 'fc6_L' in net_svd.params: l_fc6 = net_svd.params['fc6_L'][0].data.shape[0] print(' fc6_L bottleneck size: {}'.format(l_fc6)) @@ -98,9 +114,9 @@ def main(): out += '_fc6_{}'.format(l_fc6) # Compress fc7 - if net_svd.params.has_key('fc7_L'): + if 'fc7_L' in net_svd.params: l_fc7 = net_svd.params['fc7_L'][0].data.shape[0] - print ' fc7_L bottleneck size: {}'.format(l_fc7) + print(' fc7_L bottleneck size: {}'.format(l_fc7)) W_fc7 = net.params['fc7'][0].data B_fc7 = net.params['fc7'][1].data @@ -119,7 +135,7 @@ def main(): filename = '{}/{}.caffemodel'.format(out_dir, out) net_svd.save(filename) - print 'Wrote svd model to: {:s}'.format(filename) + print('Wrote svd model to: {:s}'.format(filename)) if __name__ == '__main__': main() diff --git a/tools/demo.py b/tools/demo.py index 631c68a41..d27cf8035 100755 --- a/tools/demo.py +++ b/tools/demo.py @@ -21,25 +21,39 @@ import matplotlib.pyplot as plt import numpy as np import scipy.io as sio -import caffe, os, sys, cv2 +import caffe +import os +import sys +import cv2 import argparse -CLASSES = ('__background__', - 'aeroplane', 'bicycle', 'bird', 'boat', - 'bottle', 'bus', 'car', 'cat', 'chair', - 'cow', 'diningtable', 'dog', 'horse', - 'motorbike', 'person', 'pottedplant', - 'sheep', 'sofa', 'train', 'tvmonitor') - -NETS = {'vgg16': ('VGG16', - 'VGG16_faster_rcnn_final.caffemodel'), - 'zf': ('ZF', - 'ZF_faster_rcnn_final.caffemodel')} +CLASSES = ( + '__background__', + 'aeroplane', 'bicycle', 'bird', 'boat', + 'bottle', 'bus', 'car', 'cat', 'chair', + 'cow', 'diningtable', 'dog', 'horse', + 'motorbike', 'person', 'pottedplant', + 'sheep', 'sofa', 'train', 'tvmonitor' +) + +NETS = { + 'vgg16': ( + 'VGG16', + 'VGG16_faster_rcnn_final.caffemodel' + ), + 'zf': ( + 'ZF', + 'ZF_faster_rcnn_final.caffemodel' + ) +} def vis_detections(im, class_name, dets, thresh=0.5): - """Draw detected bounding boxes.""" + """ + Draw detected bounding boxes. + """ inds = np.where(dets[:, -1] >= thresh)[0] + if len(inds) == 0: return @@ -61,14 +75,15 @@ def vis_detections(im, class_name, dets, thresh=0.5): bbox=dict(facecolor='blue', alpha=0.5), fontsize=14, color='white') - ax.set_title(('{} detections with ' - 'p({} | box) >= {:.1f}').format(class_name, class_name, - thresh), - fontsize=14) + ax.set_title('{} detections with p({} | box) >= {:.1f}'.format( + class_name, class_name, thresh), + fontsize=14 + ) plt.axis('off') plt.tight_layout() plt.draw() + def demo(net, image_name): """Detect object classes in an image using pre-computed object proposals.""" @@ -81,32 +96,45 @@ def demo(net, image_name): timer.tic() scores, boxes = im_detect(net, im) timer.toc() - print ('Detection took {:.3f}s for ' - '{:d} object proposals').format(timer.total_time, boxes.shape[0]) + + print('Detection took {:.3f}s for {:d} object proposals'.format( + timer.total_time, boxes.shape[0] + ) + ) # Visualize detections for each class CONF_THRESH = 0.8 NMS_THRESH = 0.3 for cls_ind, cls in enumerate(CLASSES[1:]): - cls_ind += 1 # because we skipped background + cls_ind += 1 # because we skipped background cls_boxes = boxes[:, 4*cls_ind:4*(cls_ind + 1)] cls_scores = scores[:, cls_ind] - dets = np.hstack((cls_boxes, - cls_scores[:, np.newaxis])).astype(np.float32) + + dets = np.hstack( + (cls_boxes, cls_scores[:, np.newaxis]) + ).astype(np.float32) + keep = nms(dets, NMS_THRESH) dets = dets[keep, :] vis_detections(im, cls, dets, thresh=CONF_THRESH) + def parse_args(): """Parse input arguments.""" parser = argparse.ArgumentParser(description='Faster R-CNN demo') - parser.add_argument('--gpu', dest='gpu_id', help='GPU device id to use [0]', - default=0, type=int) - parser.add_argument('--cpu', dest='cpu_mode', - help='Use CPU mode (overrides --gpu)', - action='store_true') - parser.add_argument('--net', dest='demo_net', help='Network to use [vgg16]', - choices=NETS.keys(), default='vgg16') + parser.add_argument( + '--gpu', dest='gpu_id', help='GPU device id to use [0]', + default=0, type=int + ) + parser.add_argument( + '--cpu', dest='cpu_mode', + help='Use CPU mode (overrides --gpu)', + action='store_true' + ) + parser.add_argument( + '--net', dest='demo_net', help='Network to use [vgg16]', + choices=NETS.keys(), default='vgg16' + ) args = parser.parse_args() @@ -134,18 +162,21 @@ def parse_args(): cfg.GPU_ID = args.gpu_id net = caffe.Net(prototxt, caffemodel, caffe.TEST) - print '\n\nLoaded network {:s}'.format(caffemodel) + print('\n\nLoaded network {:s}'.format(caffemodel)) # Warmup on a dummy image im = 128 * np.ones((300, 500, 3), dtype=np.uint8) for i in xrange(2): - _, _= im_detect(net, im) + _, _ = im_detect(net, im) + + im_names = [ + '000456.jpg', '000542.jpg', '001150.jpg', + '001763.jpg', '004545.jpg' + ] - im_names = ['000456.jpg', '000542.jpg', '001150.jpg', - '001763.jpg', '004545.jpg'] for im_name in im_names: - print '~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~' - print 'Demo for data/demo/{}'.format(im_name) + print('~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~') + print('Demo for data/demo/{}'.format(im_name)) demo(net, im_name) plt.show() diff --git a/tools/eval_recall.py b/tools/eval_recall.py index b1a59dc27..61e25bb9f 100755 --- a/tools/eval_recall.py +++ b/tools/eval_recall.py @@ -4,22 +4,31 @@ from fast_rcnn.config import cfg, cfg_from_file, cfg_from_list from datasets.factory import get_imdb import argparse -import time, os, sys +import time +import os +import sys import numpy as np + def parse_args(): """ Parse input arguments """ parser = argparse.ArgumentParser(description='Test a Fast R-CNN network') - parser.add_argument('--imdb', dest='imdb_name', - help='dataset to test', - default='voc_2007_test', type=str) - parser.add_argument('--method', dest='method', - help='proposal method', - default='selective_search', type=str) - parser.add_argument('--rpn-file', dest='rpn_file', - default=None, type=str) + parser.add_argument( + '--imdb', dest='imdb_name', + help='dataset to test', + default='voc_2007_test', type=str + ) + parser.add_argument( + '--method', dest='method', + help='proposal method', + default='selective_search', type=str + ) + parser.add_argument( + '--rpn-file', dest='rpn_file', + default=None, type=str + ) if len(sys.argv) == 1: parser.print_help() @@ -46,25 +55,28 @@ def parse_args(): raw_data = sio.loadmat(filename)['aboxes'].ravel() candidate_boxes = raw_data - ar, gt_overlaps, recalls, thresholds = \ - imdb.evaluate_recall(candidate_boxes=candidate_boxes) - print 'Method: {}'.format(args.method) - print 'AverageRec: {:.3f}'.format(ar) + ar, gt_overlaps, recalls, thresholds = imdb.evaluate_recall( + candidate_boxes=candidate_boxes + ) + + print('Method: {}'.format(args.method)) + print('AverageRec: {:.3f}'.format(ar)) def recall_at(t): ind = np.where(thresholds > t - 1e-5)[0][0] - assert np.isclose(thresholds[ind], t) + assert(np.isclose(thresholds[ind], t)) + return recalls[ind] - print 'Recall@0.5: {:.3f}'.format(recall_at(0.5)) - print 'Recall@0.6: {:.3f}'.format(recall_at(0.6)) - print 'Recall@0.7: {:.3f}'.format(recall_at(0.7)) - print 'Recall@0.8: {:.3f}'.format(recall_at(0.8)) - print 'Recall@0.9: {:.3f}'.format(recall_at(0.9)) + print('Recall@0.5: {:.3f}'.format(recall_at(0.5))) + print('Recall@0.6: {:.3f}'.format(recall_at(0.6))) + print('Recall@0.7: {:.3f}'.format(recall_at(0.7))) + print('Recall@0.8: {:.3f}'.format(recall_at(0.8))) + print('Recall@0.9: {:.3f}'.format(recall_at(0.9))) # print again for easy spreadsheet copying - print '{:.3f}'.format(ar) - print '{:.3f}'.format(recall_at(0.5)) - print '{:.3f}'.format(recall_at(0.6)) - print '{:.3f}'.format(recall_at(0.7)) - print '{:.3f}'.format(recall_at(0.8)) - print '{:.3f}'.format(recall_at(0.9)) + print('{:.3f}'.format(ar)) + print('{:.3f}'.format(recall_at(0.5))) + print('{:.3f}'.format(recall_at(0.6))) + print('{:.3f}'.format(recall_at(0.7))) + print('{:.3f}'.format(recall_at(0.8))) + print('{:.3f}'.format(recall_at(0.9))) diff --git a/tools/reval.py b/tools/reval.py index 905ec1b14..7b3dc95f1 100755 --- a/tools/reval.py +++ b/tools/reval.py @@ -14,26 +14,39 @@ from fast_rcnn.config import cfg from datasets.factory import get_imdb import cPickle -import os, sys, argparse +import os +import sys +import argparse import numpy as np + def parse_args(): """ Parse input arguments """ parser = argparse.ArgumentParser(description='Re-evaluate results') - parser.add_argument('output_dir', nargs=1, help='results directory', - type=str) - parser.add_argument('--imdb', dest='imdb_name', - help='dataset to re-evaluate', - default='voc_2007_test', type=str) - parser.add_argument('--matlab', dest='matlab_eval', - help='use matlab for evaluation', - action='store_true') - parser.add_argument('--comp', dest='comp_mode', help='competition mode', - action='store_true') - parser.add_argument('--nms', dest='apply_nms', help='apply nms', - action='store_true') + parser.add_argument( + 'output_dir', nargs=1, help='results directory', + type=str + ) + parser.add_argument( + '--imdb', dest='imdb_name', + help='dataset to re-evaluate', + default='voc_2007_test', type=str + ) + parser.add_argument( + '--matlab', dest='matlab_eval', + help='use matlab for evaluation', + action='store_true' + ) + parser.add_argument( + '--comp', dest='comp_mode', help='competition mode', + action='store_true' + ) + parser.add_argument( + '--nms', dest='apply_nms', help='apply nms', + action='store_true' + ) if len(sys.argv) == 1: parser.print_help() @@ -42,20 +55,22 @@ def parse_args(): args = parser.parse_args() return args + def from_dets(imdb_name, output_dir, args): imdb = get_imdb(imdb_name) imdb.competition_mode(args.comp_mode) imdb.config['matlab_eval'] = args.matlab_eval + with open(os.path.join(output_dir, 'detections.pkl'), 'rb') as f: dets = cPickle.load(f) if args.apply_nms: - print 'Applying NMS to all detections' + print('Applying NMS to all detections') nms_dets = apply_nms(dets, cfg.TEST.NMS) else: nms_dets = dets - print 'Evaluating detections' + print('Evaluating detections') imdb.evaluate_detections(nms_dets, output_dir) if __name__ == '__main__': diff --git a/tools/rpn_generate.py b/tools/rpn_generate.py index f8ca4a167..16e942c65 100755 --- a/tools/rpn_generate.py +++ b/tools/rpn_generate.py @@ -18,38 +18,55 @@ import caffe import argparse import pprint -import time, os, sys +import time +import os +import sys + def parse_args(): """ Parse input arguments """ parser = argparse.ArgumentParser(description='Test a Fast R-CNN network') - parser.add_argument('--gpu', dest='gpu_id', help='GPU id to use', - default=0, type=int) - parser.add_argument('--def', dest='prototxt', - help='prototxt file defining the network', - default=None, type=str) - parser.add_argument('--net', dest='caffemodel', - help='model to test', - default=None, type=str) - parser.add_argument('--cfg', dest='cfg_file', - help='optional config file', default=None, type=str) - parser.add_argument('--wait', dest='wait', - help='wait until net file exists', - default=True, type=bool) - parser.add_argument('--imdb', dest='imdb_name', - help='dataset to test', - default='voc_2007_test', type=str) - parser.add_argument('--set', dest='set_cfgs', - help='set config keys', default=None, - nargs=argparse.REMAINDER) + parser.add_argument( + '--gpu', dest='gpu_id', help='GPU id to use', + default=0, type=int + ) + parser.add_argument( + '--def', dest='prototxt', + help='prototxt file defining the network', + default=None, type=str + ) + parser.add_argument( + '--net', dest='caffemodel', + help='model to test', + default=None, type=str + ) + parser.add_argument( + '--cfg', dest='cfg_file', + help='optional config file', default=None, type=str) + parser.add_argument( + '--wait', dest='wait', + help='wait until net file exists', + default=True, type=bool + ) + parser.add_argument( + '--imdb', dest='imdb_name', + help='dataset to test', + default='voc_2007_test', type=str + ) + parser.add_argument( + '--set', dest='set_cfgs', + help='set config keys', default=None, + nargs=argparse.REMAINDER + ) if len(sys.argv) == 1: parser.print_help() sys.exit(1) args = parser.parse_args() + return args if __name__ == '__main__': @@ -88,4 +105,4 @@ def parse_args(): rpn_file = os.path.join(output_dir, net.name + '_rpn_proposals.pkl') with open(rpn_file, 'wb') as f: cPickle.dump(imdb_boxes, f, cPickle.HIGHEST_PROTOCOL) - print 'Wrote RPN proposals to {}'.format(rpn_file) + print('Wrote RPN proposals to {}'.format(rpn_file)) diff --git a/tools/test_net.py b/tools/test_net.py index de4f12b41..a4dc8a19a 100755 --- a/tools/test_net.py +++ b/tools/test_net.py @@ -16,45 +16,68 @@ import caffe import argparse import pprint -import time, os, sys +import time +import os +import sys + def parse_args(): """ Parse input arguments """ parser = argparse.ArgumentParser(description='Test a Fast R-CNN network') - parser.add_argument('--gpu', dest='gpu_id', help='GPU id to use', - default=0, type=int) - parser.add_argument('--def', dest='prototxt', - help='prototxt file defining the network', - default=None, type=str) - parser.add_argument('--net', dest='caffemodel', - help='model to test', - default=None, type=str) - parser.add_argument('--cfg', dest='cfg_file', - help='optional config file', default=None, type=str) - parser.add_argument('--wait', dest='wait', - help='wait until net file exists', - default=True, type=bool) - parser.add_argument('--imdb', dest='imdb_name', - help='dataset to test', - default='voc_2007_test', type=str) - parser.add_argument('--comp', dest='comp_mode', help='competition mode', - action='store_true') - parser.add_argument('--set', dest='set_cfgs', - help='set config keys', default=None, - nargs=argparse.REMAINDER) - parser.add_argument('--vis', dest='vis', help='visualize detections', - action='store_true') - parser.add_argument('--num_dets', dest='max_per_image', - help='max number of detections per image', - default=100, type=int) + parser.add_argument( + '--gpu', dest='gpu_id', help='GPU id to use', + default=0, type=int + ) + parser.add_argument( + '--def', dest='prototxt', + help='prototxt file defining the network', + default=None, type=str + ) + parser.add_argument( + '--net', dest='caffemodel', + help='model to test', + default=None, type=str + ) + parser.add_argument( + '--cfg', dest='cfg_file', + help='optional config file', default=None, type=str + ) + parser.add_argument( + '--wait', dest='wait', + help='wait until net file exists', + default=True, type=bool + ) + parser.add_argument( + '--imdb', dest='imdb_name', + help='dataset to test', + default='voc_2007_test', type=str + ) + parser.add_argument( + '--comp', dest='comp_mode', help='competition mode', + action='store_true') + parser.add_argument( + '--set', dest='set_cfgs', + help='set config keys', default=None, + nargs=argparse.REMAINDER + ) + parser.add_argument( + '--vis', dest='vis', help='visualize detections', + action='store_true' + ) + parser.add_argument( + '--num_dets', dest='max_per_image', + help='max number of detections per image', + default=100, type=int + ) if len(sys.argv) == 1: parser.print_help() sys.exit(1) args = parser.parse_args() + return args if __name__ == '__main__': @@ -84,6 +107,7 @@ def parse_args(): imdb = get_imdb(args.imdb_name) imdb.competition_mode(args.comp_mode) + if not cfg.TEST.HAS_RPN: imdb.set_proposal_method(cfg.TEST.PROPOSAL_METHOD) diff --git a/tools/train_faster_rcnn_alt_opt.py b/tools/train_faster_rcnn_alt_opt.py index e49844a45..9fa9d277d 100755 --- a/tools/train_faster_rcnn_alt_opt.py +++ b/tools/train_faster_rcnn_alt_opt.py @@ -7,7 +7,8 @@ # Written by Ross Girshick # -------------------------------------------------------- -"""Train a Faster R-CNN network using alternating optimization. +""" +Train a Faster R-CNN network using alternating optimization. This tool implements the alternating optimization algorithm described in our NIPS 2015 paper ("Faster R-CNN: Towards Real-time Object Detection with Region Proposal Networks." Shaoqing Ren, Kaiming He, Ross Girshick, Jian Sun.) @@ -21,67 +22,92 @@ import argparse import pprint import numpy as np -import sys, os +import sys +import os import multiprocessing as mp import cPickle import shutil + def parse_args(): """ Parse input arguments """ parser = argparse.ArgumentParser(description='Train a Faster R-CNN network') - parser.add_argument('--gpu', dest='gpu_id', - help='GPU device id to use [0]', - default=0, type=int) - parser.add_argument('--net_name', dest='net_name', - help='network name (e.g., "ZF")', - default=None, type=str) - parser.add_argument('--weights', dest='pretrained_model', - help='initialize with pretrained model weights', - default=None, type=str) - parser.add_argument('--cfg', dest='cfg_file', - help='optional config file', - default=None, type=str) - parser.add_argument('--imdb', dest='imdb_name', - help='dataset to train on', - default='voc_2007_trainval', type=str) - parser.add_argument('--set', dest='set_cfgs', - help='set config keys', default=None, - nargs=argparse.REMAINDER) + parser.add_argument( + '--gpu', dest='gpu_id', + help='GPU device id to use [0]', + default=0, type=int + ) + parser.add_argument( + '--net_name', dest='net_name', + help='network name (e.g., "ZF")', + default=None, type=str + ) + parser.add_argument( + '--weights', dest='pretrained_model', + help='initialize with pretrained model weights', + default=None, type=str + ) + parser.add_argument( + '--cfg', dest='cfg_file', + help='optional config file', + default=None, type=str + ) + parser.add_argument( + '--imdb', dest='imdb_name', + help='dataset to train on', + default='voc_2007_trainval', type=str + ) + parser.add_argument( + '--set', dest='set_cfgs', + help='set config keys', default=None, + nargs=argparse.REMAINDER + ) if len(sys.argv) == 1: parser.print_help() sys.exit(1) args = parser.parse_args() + return args + def get_roidb(imdb_name, rpn_file=None): imdb = get_imdb(imdb_name) - print 'Loaded dataset `{:s}` for training'.format(imdb.name) + print('Loaded dataset `{:s}` for training'.format(imdb.name)) imdb.set_proposal_method(cfg.TRAIN.PROPOSAL_METHOD) - print 'Set proposal method: {:s}'.format(cfg.TRAIN.PROPOSAL_METHOD) + print('Set proposal method: {:s}'.format(cfg.TRAIN.PROPOSAL_METHOD)) + if rpn_file is not None: imdb.config['rpn_file'] = rpn_file + roidb = get_training_roidb(imdb) + return roidb, imdb + def get_solvers(net_name): # Faster R-CNN Alternating Optimization n = 'faster_rcnn_alt_opt' # Solver for each training stage - solvers = [[net_name, n, 'stage1_rpn_solver60k80k.pt'], - [net_name, n, 'stage1_fast_rcnn_solver30k40k.pt'], - [net_name, n, 'stage2_rpn_solver60k80k.pt'], - [net_name, n, 'stage2_fast_rcnn_solver30k40k.pt']] + solvers = [ + [net_name, n, 'stage1_rpn_solver60k80k.pt'], + [net_name, n, 'stage1_fast_rcnn_solver30k40k.pt'], + [net_name, n, 'stage2_rpn_solver60k80k.pt'], + [net_name, n, 'stage2_fast_rcnn_solver30k40k.pt'] + ] + solvers = [os.path.join(cfg.MODELS_DIR, *s) for s in solvers] # Iterations for each training stage max_iters = [80000, 40000, 80000, 40000] # max_iters = [100, 100, 100, 100] # Test prototxt for the RPN rpn_test_prototxt = os.path.join( - cfg.MODELS_DIR, net_name, n, 'rpn_test.pt') + cfg.MODELS_DIR, net_name, n, 'rpn_test.pt' + ) + return solvers, max_iters, rpn_test_prototxt # ------------------------------------------------------------------------------ @@ -90,6 +116,7 @@ def get_solvers(net_name): # stage is executed in a separate process using multiprocessing.Process. # ------------------------------------------------------------------------------ + def _init_caffe(cfg): """Initialize pycaffe in a training process. """ @@ -102,9 +129,11 @@ def _init_caffe(cfg): caffe.set_mode_gpu() caffe.set_device(cfg.GPU_ID) + def train_rpn(queue=None, imdb_name=None, init_model=None, solver=None, max_iters=None, cfg=None): - """Train a Region Proposal Network in a separate training process. + """ + Train a Region Proposal Network in a separate training process. """ # Not using any proposals, just ground-truth boxes @@ -112,7 +141,7 @@ def train_rpn(queue=None, imdb_name=None, init_model=None, solver=None, cfg.TRAIN.BBOX_REG = False # applies only to Fast R-CNN bbox regression cfg.TRAIN.PROPOSAL_METHOD = 'gt' cfg.TRAIN.IMS_PER_BATCH = 1 - print 'Init model: {}'.format(init_model) + print('Init model: {}'.format(init_model)) print('Using config:') pprint.pprint(cfg) @@ -120,28 +149,33 @@ def train_rpn(queue=None, imdb_name=None, init_model=None, solver=None, _init_caffe(cfg) roidb, imdb = get_roidb(imdb_name) - print 'roidb len: {}'.format(len(roidb)) + print('roidb len: {}'.format(len(roidb))) output_dir = get_output_dir(imdb) - print 'Output will be saved to `{:s}`'.format(output_dir) + print('Output will be saved to `{:s}`'.format(output_dir)) - model_paths = train_net(solver, roidb, output_dir, - pretrained_model=init_model, - max_iters=max_iters) + model_paths = train_net( + solver, roidb, output_dir, + pretrained_model=init_model, + max_iters=max_iters + ) # Cleanup all but the final model for i in model_paths[:-1]: os.remove(i) + rpn_model_path = model_paths[-1] # Send final model path through the multiprocessing queue queue.put({'model_path': rpn_model_path}) + def rpn_generate(queue=None, imdb_name=None, rpn_model_path=None, cfg=None, rpn_test_prototxt=None): - """Use a trained RPN to generate proposals. + """ + Use a trained RPN to generate proposals. """ cfg.TEST.RPN_PRE_NMS_TOP_N = -1 # no pre NMS filtering cfg.TEST.RPN_POST_NMS_TOP_N = 2000 # limit top boxes after NMS - print 'RPN model: {}'.format(rpn_model_path) + print('RPN model: {}'.format(rpn_model_path)) print('Using config:') pprint.pprint(cfg) @@ -152,34 +186,40 @@ def rpn_generate(queue=None, imdb_name=None, rpn_model_path=None, cfg=None, # We compute them on the image once and then flip the already computed # proposals. This might cause a minor loss in mAP (less proposal jittering). imdb = get_imdb(imdb_name) - print 'Loaded dataset `{:s}` for proposal generation'.format(imdb.name) + print('Loaded dataset `{:s}` for proposal generation'.format(imdb.name)) # Load RPN and configure output directory rpn_net = caffe.Net(rpn_test_prototxt, rpn_model_path, caffe.TEST) output_dir = get_output_dir(imdb) - print 'Output will be saved to `{:s}`'.format(output_dir) + print('Output will be saved to `{:s}`'.format(output_dir)) # Generate proposals on the imdb rpn_proposals = imdb_proposals(rpn_net, imdb) # Write proposals to disk and send the proposal file path through the # multiprocessing queue rpn_net_name = os.path.splitext(os.path.basename(rpn_model_path))[0] rpn_proposals_path = os.path.join( - output_dir, rpn_net_name + '_proposals.pkl') + output_dir, rpn_net_name + '_proposals.pkl' + ) + with open(rpn_proposals_path, 'wb') as f: cPickle.dump(rpn_proposals, f, cPickle.HIGHEST_PROTOCOL) - print 'Wrote RPN proposals to {}'.format(rpn_proposals_path) + + print('Wrote RPN proposals to {}'.format(rpn_proposals_path)) + queue.put({'proposal_path': rpn_proposals_path}) + def train_fast_rcnn(queue=None, imdb_name=None, init_model=None, solver=None, max_iters=None, cfg=None, rpn_file=None): - """Train a Fast R-CNN using proposals generated by an RPN. + """ + Train a Fast R-CNN using proposals generated by an RPN. """ cfg.TRAIN.HAS_RPN = False # not generating prosals on-the-fly cfg.TRAIN.PROPOSAL_METHOD = 'rpn' # use pre-computed RPN proposals instead cfg.TRAIN.IMS_PER_BATCH = 2 - print 'Init model: {}'.format(init_model) - print 'RPN proposals: {}'.format(rpn_file) + print('Init model: {}'.format(init_model)) + print('RPN proposals: {}'.format(rpn_file)) print('Using config:') pprint.pprint(cfg) @@ -188,14 +228,18 @@ def train_fast_rcnn(queue=None, imdb_name=None, init_model=None, solver=None, roidb, imdb = get_roidb(imdb_name, rpn_file=rpn_file) output_dir = get_output_dir(imdb) - print 'Output will be saved to `{:s}`'.format(output_dir) + print('Output will be saved to `{:s}`'.format(output_dir)) # Train Fast R-CNN - model_paths = train_net(solver, roidb, output_dir, - pretrained_model=init_model, - max_iters=max_iters) + model_paths = train_net( + solver, roidb, output_dir, + pretrained_model=init_model, + max_iters=max_iters + ) + # Cleanup all but the final model for i in model_paths[:-1]: os.remove(i) + fast_rcnn_model_path = model_paths[-1] # Send Fast R-CNN model path over the multiprocessing queue queue.put({'model_path': fast_rcnn_model_path}) @@ -224,101 +268,113 @@ def train_fast_rcnn(queue=None, imdb_name=None, init_model=None, solver=None, # solves, iters, etc. for each training stage solvers, max_iters, rpn_test_prototxt = get_solvers(args.net_name) - print '~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~' - print 'Stage 1 RPN, init from ImageNet model' - print '~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~' + print('~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~') + print('Stage 1 RPN, init from ImageNet model') + print('~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~') cfg.TRAIN.SNAPSHOT_INFIX = 'stage1' mp_kwargs = dict( - queue=mp_queue, - imdb_name=args.imdb_name, - init_model=args.pretrained_model, - solver=solvers[0], - max_iters=max_iters[0], - cfg=cfg) + queue=mp_queue, + imdb_name=args.imdb_name, + init_model=args.pretrained_model, + solver=solvers[0], + max_iters=max_iters[0], + cfg=cfg + ) + p = mp.Process(target=train_rpn, kwargs=mp_kwargs) p.start() rpn_stage1_out = mp_queue.get() p.join() - print '~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~' - print 'Stage 1 RPN, generate proposals' - print '~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~' + print('~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~') + print('Stage 1 RPN, generate proposals') + print('~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~') mp_kwargs = dict( - queue=mp_queue, - imdb_name=args.imdb_name, - rpn_model_path=str(rpn_stage1_out['model_path']), - cfg=cfg, - rpn_test_prototxt=rpn_test_prototxt) + queue=mp_queue, + imdb_name=args.imdb_name, + rpn_model_path=str(rpn_stage1_out['model_path']), + cfg=cfg, + rpn_test_prototxt=rpn_test_prototxt + ) + p = mp.Process(target=rpn_generate, kwargs=mp_kwargs) p.start() rpn_stage1_out['proposal_path'] = mp_queue.get()['proposal_path'] p.join() - print '~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~' - print 'Stage 1 Fast R-CNN using RPN proposals, init from ImageNet model' - print '~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~' + print('~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~') + print('Stage 1 Fast R-CNN using RPN proposals, init from ImageNet model') + print('~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~') cfg.TRAIN.SNAPSHOT_INFIX = 'stage1' mp_kwargs = dict( - queue=mp_queue, - imdb_name=args.imdb_name, - init_model=args.pretrained_model, - solver=solvers[1], - max_iters=max_iters[1], - cfg=cfg, - rpn_file=rpn_stage1_out['proposal_path']) + queue=mp_queue, + imdb_name=args.imdb_name, + init_model=args.pretrained_model, + solver=solvers[1], + max_iters=max_iters[1], + cfg=cfg, + rpn_file=rpn_stage1_out['proposal_path'] + ) + p = mp.Process(target=train_fast_rcnn, kwargs=mp_kwargs) p.start() fast_rcnn_stage1_out = mp_queue.get() p.join() - print '~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~' - print 'Stage 2 RPN, init from stage 1 Fast R-CNN model' - print '~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~' + print('~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~') + print('Stage 2 RPN, init from stage 1 Fast R-CNN model') + print('~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~') cfg.TRAIN.SNAPSHOT_INFIX = 'stage2' mp_kwargs = dict( - queue=mp_queue, - imdb_name=args.imdb_name, - init_model=str(fast_rcnn_stage1_out['model_path']), - solver=solvers[2], - max_iters=max_iters[2], - cfg=cfg) + queue=mp_queue, + imdb_name=args.imdb_name, + init_model=str(fast_rcnn_stage1_out['model_path']), + solver=solvers[2], + max_iters=max_iters[2], + cfg=cfg + ) + p = mp.Process(target=train_rpn, kwargs=mp_kwargs) p.start() rpn_stage2_out = mp_queue.get() p.join() - print '~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~' - print 'Stage 2 RPN, generate proposals' - print '~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~' + print('~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~') + print('Stage 2 RPN, generate proposals') + print('~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~') mp_kwargs = dict( - queue=mp_queue, - imdb_name=args.imdb_name, - rpn_model_path=str(rpn_stage2_out['model_path']), - cfg=cfg, - rpn_test_prototxt=rpn_test_prototxt) + queue=mp_queue, + imdb_name=args.imdb_name, + rpn_model_path=str(rpn_stage2_out['model_path']), + cfg=cfg, + rpn_test_prototxt=rpn_test_prototxt + ) + p = mp.Process(target=rpn_generate, kwargs=mp_kwargs) p.start() rpn_stage2_out['proposal_path'] = mp_queue.get()['proposal_path'] p.join() - print '~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~' - print 'Stage 2 Fast R-CNN, init from stage 2 RPN R-CNN model' - print '~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~' + print('~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~') + print('Stage 2 Fast R-CNN, init from stage 2 RPN R-CNN model') + print('~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~') cfg.TRAIN.SNAPSHOT_INFIX = 'stage2' mp_kwargs = dict( - queue=mp_queue, - imdb_name=args.imdb_name, - init_model=str(rpn_stage2_out['model_path']), - solver=solvers[3], - max_iters=max_iters[3], - cfg=cfg, - rpn_file=rpn_stage2_out['proposal_path']) + queue=mp_queue, + imdb_name=args.imdb_name, + init_model=str(rpn_stage2_out['model_path']), + solver=solvers[3], + max_iters=max_iters[3], + cfg=cfg, + rpn_file=rpn_stage2_out['proposal_path'] + ) + p = mp.Process(target=train_fast_rcnn, kwargs=mp_kwargs) p.start() fast_rcnn_stage2_out = mp_queue.get() @@ -326,9 +382,15 @@ def train_fast_rcnn(queue=None, imdb_name=None, init_model=None, solver=None, # Create final model (just a copy of the last stage) final_path = os.path.join( - os.path.dirname(fast_rcnn_stage2_out['model_path']), - args.net_name + '_faster_rcnn_final.caffemodel') - print 'cp {} -> {}'.format( - fast_rcnn_stage2_out['model_path'], final_path) + os.path.dirname(fast_rcnn_stage2_out['model_path']), + args.net_name + '_faster_rcnn_final.caffemodel' + ) + + print('cp {} -> {}'.format( + fast_rcnn_stage2_out['model_path'], + final_path + ) + ) + shutil.copy(fast_rcnn_stage2_out['model_path'], final_path) - print 'Final model: {}'.format(final_path) + print('Final model: {}'.format(final_path)) diff --git a/tools/train_net.py b/tools/train_net.py index 622a95d68..7f9c2c0a6 100755 --- a/tools/train_net.py +++ b/tools/train_net.py @@ -20,50 +20,71 @@ import numpy as np import sys + def parse_args(): """ Parse input arguments """ parser = argparse.ArgumentParser(description='Train a Fast R-CNN network') - parser.add_argument('--gpu', dest='gpu_id', - help='GPU device id to use [0]', - default=0, type=int) - parser.add_argument('--solver', dest='solver', - help='solver prototxt', - default=None, type=str) - parser.add_argument('--iters', dest='max_iters', - help='number of iterations to train', - default=40000, type=int) - parser.add_argument('--weights', dest='pretrained_model', - help='initialize with pretrained model weights', - default=None, type=str) - parser.add_argument('--cfg', dest='cfg_file', - help='optional config file', - default=None, type=str) - parser.add_argument('--imdb', dest='imdb_name', - help='dataset to train on', - default='voc_2007_trainval', type=str) - parser.add_argument('--rand', dest='randomize', - help='randomize (do not use a fixed seed)', - action='store_true') - parser.add_argument('--set', dest='set_cfgs', - help='set config keys', default=None, - nargs=argparse.REMAINDER) + parser.add_argument( + '--gpu', dest='gpu_id', + help='GPU device id to use [0]', + default=0, type=int + ) + parser.add_argument( + '--solver', dest='solver', + help='solver prototxt', + default=None, type=str + ) + parser.add_argument( + '--iters', dest='max_iters', + help='number of iterations to train', + default=40000, type=int + ) + parser.add_argument( + '--weights', dest='pretrained_model', + help='initialize with pretrained model weights', + default=None, type=str + ) + parser.add_argument( + '--cfg', dest='cfg_file', + help='optional config file', + default=None, type=str + ) + parser.add_argument( + '--imdb', dest='imdb_name', + help='dataset to train on', + default='voc_2007_trainval', type=str + ) + parser.add_argument( + '--rand', dest='randomize', + help='randomize (do not use a fixed seed)', + action='store_true' + ) + parser.add_argument( + '--set', dest='set_cfgs', + help='set config keys', default=None, + nargs=argparse.REMAINDER + ) if len(sys.argv) == 1: parser.print_help() sys.exit(1) args = parser.parse_args() + return args + def combined_roidb(imdb_names): + def get_roidb(imdb_name): imdb = get_imdb(imdb_name) - print 'Loaded dataset `{:s}` for training'.format(imdb.name) + print('Loaded dataset `{:s}` for training'.format(imdb.name)) imdb.set_proposal_method(cfg.TRAIN.PROPOSAL_METHOD) - print 'Set proposal method: {:s}'.format(cfg.TRAIN.PROPOSAL_METHOD) + print('Set proposal method: {:s}'.format(cfg.TRAIN.PROPOSAL_METHOD)) roidb = get_training_roidb(imdb) + return roidb roidbs = [get_roidb(s) for s in imdb_names.split('+')] @@ -74,6 +95,7 @@ def get_roidb(imdb_name): imdb = datasets.imdb.imdb(imdb_names) else: imdb = get_imdb(imdb_names) + return imdb, roidb if __name__ == '__main__': @@ -102,11 +124,13 @@ def get_roidb(imdb_name): caffe.set_device(args.gpu_id) imdb, roidb = combined_roidb(args.imdb_name) - print '{:d} roidb entries'.format(len(roidb)) + print('{:d} roidb entries'.format(len(roidb))) output_dir = get_output_dir(imdb) - print 'Output will be saved to `{:s}`'.format(output_dir) + print('Output will be saved to `{:s}`'.format(output_dir)) - train_net(args.solver, roidb, output_dir, - pretrained_model=args.pretrained_model, - max_iters=args.max_iters) + train_net( + args.solver, roidb, output_dir, + pretrained_model=args.pretrained_model, + max_iters=args.max_iters + ) diff --git a/tools/train_svms.py b/tools/train_svms.py index 498bbf2a2..65832c194 100755 --- a/tools/train_svms.py +++ b/tools/train_svms.py @@ -24,7 +24,9 @@ import numpy.random as npr import cv2 from sklearn import svm -import os, sys +import os +import sys + class SVMTrainer(object): """ @@ -47,44 +49,55 @@ def __init__(self, net, imdb): for cls in imdb.classes] def _get_feature_scale(self, num_images=100): - TARGET_NORM = 20.0 # Magic value from traditional R-CNN + TARGET_NORM = 20.0 # Magic value from traditional R-CNN _t = Timer() roidb = self.imdb.roidb total_norm = 0.0 count = 0.0 - inds = npr.choice(xrange(self.imdb.num_images), size=num_images, - replace=False) + inds = npr.choice( + xrange(self.imdb.num_images), size=num_images, + replace=False + ) + for i_, i in enumerate(inds): im = cv2.imread(self.imdb.image_path_at(i)) + if roidb[i]['flipped']: im = im[:, ::-1, :] + _t.tic() scores, boxes = im_detect(self.net, im, roidb[i]['boxes']) _t.toc() feat = self.net.blobs[self.layer].data total_norm += np.sqrt((feat ** 2).sum(axis=1)).sum() count += feat.shape[0] - print('{}/{}: avg feature norm: {:.3f}'.format(i_ + 1, num_images, - total_norm / count)) + print('{}/{}: avg feature norm: {:.3f}'.format( + i_ + 1, num_images, + total_norm / count + ) + ) return TARGET_NORM * 1.0 / (total_norm / count) def _get_pos_counts(self): counts = np.zeros((len(self.imdb.classes)), dtype=np.int) roidb = self.imdb.roidb + for i in xrange(len(roidb)): for j in xrange(1, self.imdb.num_classes): I = np.where(roidb[i]['gt_classes'] == j)[0] counts[j] += len(I) for j in xrange(1, self.imdb.num_classes): - print('class {:s} has {:d} positives'. - format(self.imdb.classes[j], counts[j])) + print('class {:s} has {:d} positives'.format( + self.imdb.classes[j], counts[j] + )) return counts def get_pos_examples(self): counts = self._get_pos_counts() + for i in xrange(len(counts)): self.trainers[i].alloc_pos(counts[i]) @@ -94,22 +107,26 @@ def get_pos_examples(self): # num_images = 100 for i in xrange(num_images): im = cv2.imread(self.imdb.image_path_at(i)) + if roidb[i]['flipped']: im = im[:, ::-1, :] + gt_inds = np.where(roidb[i]['gt_classes'] > 0)[0] gt_boxes = roidb[i]['boxes'][gt_inds] _t.tic() scores, boxes = im_detect(self.net, im, gt_boxes) _t.toc() feat = self.net.blobs[self.layer].data + for j in xrange(1, self.imdb.num_classes): cls_inds = np.where(roidb[i]['gt_classes'][gt_inds] == j)[0] if len(cls_inds) > 0: cls_feat = feat[cls_inds, :] self.trainers[j].append_pos(cls_feat) - print 'get_pos_examples: {:d}/{:d} {:.3f}s' \ - .format(i + 1, len(roidb), _t.average_time) + print('get_pos_examples: {:d}/{:d} {:.3f}s'.format( + i + 1, len(roidb), _t.average_time + )) def initialize_net(self): # Start all SVM parameters at zero @@ -119,14 +136,14 @@ def initialize_net(self): # Initialize SVMs in a smart way. Not doing this because its such # a good initialization that we might not learn something close to # the SVM solution. -# # subtract background weights and biases for the foreground classes -# w_bg = self.net.params['cls_score'][0].data[0, :] -# b_bg = self.net.params['cls_score'][1].data[0] -# self.net.params['cls_score'][0].data[1:, :] -= w_bg -# self.net.params['cls_score'][1].data[1:] -= b_bg -# # set the background weights and biases to 0 (where they shall remain) -# self.net.params['cls_score'][0].data[0, :] = 0 -# self.net.params['cls_score'][1].data[0] = 0 + # # subtract background weights and biases for the foreground classes + # w_bg = self.net.params['cls_score'][0].data[0, :] + # b_bg = self.net.params['cls_score'][1].data[0] + # self.net.params['cls_score'][0].data[1:, :] -= w_bg + # self.net.params['cls_score'][1].data[1:] -= b_bg + # # set the background weights and biases to 0 (where they shall remain) + # self.net.params['cls_score'][0].data[0, :] = 0 + # self.net.params['cls_score'][1].data[0] = 0 def update_net(self, cls_ind, w, b): self.net.params['cls_score'][0].data[cls_ind, :] = w @@ -137,29 +154,35 @@ def train_with_hard_negatives(self): roidb = self.imdb.roidb num_images = len(roidb) # num_images = 100 + for i in xrange(num_images): im = cv2.imread(self.imdb.image_path_at(i)) + if roidb[i]['flipped']: im = im[:, ::-1, :] + _t.tic() scores, boxes = im_detect(self.net, im, roidb[i]['boxes']) _t.toc() feat = self.net.blobs[self.layer].data for j in xrange(1, self.imdb.num_classes): - hard_inds = \ - np.where((scores[:, j] > self.hard_thresh) & - (roidb[i]['gt_overlaps'][:, j].toarray().ravel() < - self.neg_iou_thresh))[0] + hard_inds = np.where( + (scores[:, j] > self.hard_thresh) & + (roidb[i]['gt_overlaps'][:, j].toarray().ravel() < self.neg_iou_thresh) + )[0] + if len(hard_inds) > 0: hard_feat = feat[hard_inds, :].copy() - new_w_b = \ - self.trainers[j].append_neg_and_retrain(feat=hard_feat) + new_w_b = self.trainers[j].append_neg_and_retrain( + feat=hard_feat + ) + if new_w_b is not None: self.update_net(j, new_w_b[0], new_w_b[1]) - print(('train_with_hard_negatives: ' - '{:d}/{:d} {:.3f}s').format(i + 1, len(roidb), - _t.average_time)) + print('train_with_hard_negatives: {:d}/{:d} {:.3f}s'.format( + i + 1, len(roidb), _t.average_time + )) def train(self): # Initialize SVMs using @@ -189,6 +212,7 @@ def train(self): new_w_b = self.trainers[j].append_neg_and_retrain(force=True) self.update_net(j, new_w_b[0], new_w_b[1]) + class SVMClassTrainer(object): """Manages post-hoc SVM training for a single object class.""" @@ -202,10 +226,12 @@ def __init__(self, cls, dim, feature_scale=1.0, self.pos_weight = pos_weight self.dim = dim self.feature_scale = feature_scale - self.svm = svm.LinearSVC(C=C, class_weight={1: 2, -1: 1}, - intercept_scaling=B, verbose=1, - penalty='l2', loss='l1', - random_state=cfg.RNG_SEED, dual=True) + self.svm = svm.LinearSVC( + C=C, class_weight={1: 2, -1: 1}, + intercept_scaling=B, verbose=1, + penalty='l2', loss='l1', + random_state=cfg.RNG_SEED, dual=True + ) self.pos_cur = 0 self.num_neg_added = 0 self.retrain_limit = 2000 @@ -223,13 +249,19 @@ def append_pos(self, feat): def train(self): print('>>> Updating {} detector <<<'.format(self.cls)) + num_pos = self.pos.shape[0] num_neg = self.neg.shape[0] - print('Cache holds {} pos examples and {} neg examples'. - format(num_pos, num_neg)) + + print('Cache holds {} pos examples and {} neg examples'.format( + num_pos, num_neg + )) + X = np.vstack((self.pos, self.neg)) * self.feature_scale - y = np.hstack((np.ones(num_pos), - -np.ones(num_neg))) + y = np.hstack( + (np.ones(num_pos), -np.ones(num_neg)) + ) + self.svm.fit(X, y) w = self.svm.coef_ b = self.svm.intercept_[0] @@ -251,8 +283,10 @@ def train(self): # Sanity check scores_ret = ( X * 1.0 / self.feature_scale).dot(w.T * self.feature_scale) + b - assert np.allclose(scores, scores_ret[:, 0], atol=1e-5), \ - "Scores from returned model don't match decision function" + assert( + np.allclose(scores, scores_ret[:, 0], atol=1e-5), + "Scores from returned model don't match decision function" + ) return ((w * self.feature_scale, b), pos_scores, neg_scores) @@ -261,15 +295,18 @@ def append_neg_and_retrain(self, feat=None, force=False): num = feat.shape[0] self.neg = np.vstack((self.neg, feat)) self.num_neg_added += num + if self.num_neg_added > self.retrain_limit or force: self.num_neg_added = 0 new_w_b, pos_scores, neg_scores = self.train() # scores = np.dot(self.neg, new_w_b[0].T) + new_w_b[1] # easy_inds = np.where(neg_scores < self.evict_thresh)[0] not_easy_inds = np.where(neg_scores >= self.evict_thresh)[0] + if len(not_easy_inds) > 0: self.neg = self.neg[not_easy_inds, :] # self.neg = np.delete(self.neg, easy_inds) + print(' Pruning easy negatives') print(' Cache holds {} pos examples and {} neg examples'. format(self.pos.shape[0], self.neg.shape[0])) @@ -279,30 +316,42 @@ def append_neg_and_retrain(self, feat=None, force=False): else: return None + def parse_args(): """ Parse input arguments """ parser = argparse.ArgumentParser(description='Train SVMs (old skool)') - parser.add_argument('--gpu', dest='gpu_id', help='GPU device id to use [0]', - default=0, type=int) - parser.add_argument('--def', dest='prototxt', - help='prototxt file defining the network', - default=None, type=str) - parser.add_argument('--net', dest='caffemodel', - help='model to test', - default=None, type=str) - parser.add_argument('--cfg', dest='cfg_file', - help='optional config file', default=None, type=str) - parser.add_argument('--imdb', dest='imdb_name', - help='dataset to train on', - default='voc_2007_trainval', type=str) + parser.add_argument( + '--gpu', dest='gpu_id', help='GPU device id to use [0]', + default=0, type=int + ) + parser.add_argument( + '--def', dest='prototxt', + help='prototxt file defining the network', + default=None, type=str + ) + parser.add_argument( + '--net', dest='caffemodel', + help='model to test', + default=None, type=str + ) + parser.add_argument( + '--cfg', dest='cfg_file', + help='optional config file', default=None, type=str + ) + parser.add_argument( + '--imdb', dest='imdb_name', + help='dataset to train on', + default='voc_2007_trainval', type=str + ) if len(sys.argv) == 1: parser.print_help() sys.exit(1) args = parser.parse_args() + return args if __name__ == '__main__': @@ -332,22 +381,23 @@ def parse_args(): caffe.set_mode_gpu() if args.gpu_id is not None: caffe.set_device(args.gpu_id) + net = caffe.Net(args.prototxt, args.caffemodel, caffe.TEST) net.name = os.path.splitext(os.path.basename(args.caffemodel))[0] out = os.path.splitext(os.path.basename(args.caffemodel))[0] + '_svm' out_dir = os.path.dirname(args.caffemodel) imdb = get_imdb(args.imdb_name) - print 'Loaded dataset `{:s}` for training'.format(imdb.name) + print('Loaded dataset `{:s}` for training'.format(imdb.name)) # enhance roidb to contain flipped examples if cfg.TRAIN.USE_FLIPPED: - print 'Appending horizontally-flipped training examples...' + print('Appending horizontally-flipped training examples...') imdb.append_flipped_images() - print 'done' + print('done') SVMTrainer(net, imdb).train() filename = '{}/{}.caffemodel'.format(out_dir, out) net.save(filename) - print 'Wrote svm model to: {:s}'.format(filename) + print('Wrote svm model to: {:s}'.format(filename)) From 31035e0bd0b7feb725c99dac616d6bca25f8be63 Mon Sep 17 00:00:00 2001 From: Hugo Serrat Date: Wed, 18 May 2016 17:25:17 +0200 Subject: [PATCH 2/3] Changed caffe repo --- .gitmodules | 3 +++ README.md | 15 ++++++++++++--- caffe | 1 + caffe-fast-rcnn | 1 - 4 files changed, 16 insertions(+), 4 deletions(-) create mode 160000 caffe delete mode 160000 caffe-fast-rcnn diff --git a/.gitmodules b/.gitmodules index 28e6d5be2..f23e2c031 100644 --- a/.gitmodules +++ b/.gitmodules @@ -2,3 +2,6 @@ path = caffe-fast-rcnn url = https://github.com/rbgirshick/caffe-fast-rcnn.git branch = fast-rcnn +[submodule "caffe"] + path = caffe + url = https://github.com/Austriker/caffe.git diff --git a/README.md b/README.md index 0276a3ad2..6ee238fae 100644 --- a/README.md +++ b/README.md @@ -1,3 +1,12 @@ +# *Faster* R-CNN Fork + +#### Warning +This Fork is still a work in progress + +This fork : + - Merged the [caffe-fast-rcnn](https://github.com/rbgirshick/caffe-fast-rcnn/tree/0dcd397b29507b8314e252e850518c5695efbb83) fork to the current Caffe version. The [PR](https://github.com/BVLC/caffe/pull/4163) is waiting to be Merged + - Add support to Python 3.4 + ### Disclaimer The official Faster R-CNN code (written in MATLAB) is available [here](https://github.com/ShaoqingRen/faster_rcnn). @@ -74,7 +83,7 @@ If you find Faster R-CNN useful in your research, please consider citing: 1. Clone the Faster R-CNN repository ```Shell # Make sure to clone with --recursive - git clone --recursive https://github.com/rbgirshick/py-faster-rcnn.git + git clone --recursive https://github.com/Austriker/py-faster-rcnn.git ``` 2. We'll call the directory that you cloned Faster R-CNN into `FRCN_ROOT` @@ -85,7 +94,7 @@ If you find Faster R-CNN useful in your research, please consider citing: ```Shell git submodule update --init --recursive ``` - **Note 2:** The `caffe-fast-rcnn` submodule needs to be on the `faster-rcnn` branch (or equivalent detached state). This will happen automatically *if you followed step 1 instructions*. + **Note 2:** The `caffe` submodule needs to be on the `fast-rcnn` branch (or equivalent detached state). This will happen automatically *if you followed step 1 instructions*. 3. Build the Cython modules ```Shell @@ -95,7 +104,7 @@ If you find Faster R-CNN useful in your research, please consider citing: 4. Build Caffe and pycaffe ```Shell - cd $FRCN_ROOT/caffe-fast-rcnn + cd $FRCN_ROOT/caffe # Now follow the Caffe installation instructions here: # http://caffe.berkeleyvision.org/installation.html diff --git a/caffe b/caffe new file mode 160000 index 000000000..c5f996b29 --- /dev/null +++ b/caffe @@ -0,0 +1 @@ +Subproject commit c5f996b290bfe45a78fd662cfa24a61e89f58fd3 diff --git a/caffe-fast-rcnn b/caffe-fast-rcnn deleted file mode 160000 index 0dcd397b2..000000000 --- a/caffe-fast-rcnn +++ /dev/null @@ -1 +0,0 @@ -Subproject commit 0dcd397b29507b8314e252e850518c5695efbb83 From ff4172b4bfcb3bff482b30142ccecb2e7cd174f1 Mon Sep 17 00:00:00 2001 From: Hugo Serrat Date: Wed, 18 May 2016 17:32:26 +0200 Subject: [PATCH 3/3] Removed submodule --- .gitmodules | 4 ---- 1 file changed, 4 deletions(-) diff --git a/.gitmodules b/.gitmodules index f23e2c031..70bee79df 100644 --- a/.gitmodules +++ b/.gitmodules @@ -1,7 +1,3 @@ -[submodule "caffe-fast-rcnn"] - path = caffe-fast-rcnn - url = https://github.com/rbgirshick/caffe-fast-rcnn.git - branch = fast-rcnn [submodule "caffe"] path = caffe url = https://github.com/Austriker/caffe.git