From f1bba4516aa1a70e91c18712c4727f9ae100a0f7 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Bernhard=20Sch=C3=A4fer?= Date: Tue, 5 Mar 2019 15:56:32 +0100 Subject: [PATCH 01/17] Fix README.md citation typo (#525) --- README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.md b/README.md index 4fb4959b3..01a81d812 100644 --- a/README.md +++ b/README.md @@ -221,7 +221,7 @@ Please consider citing this project in your publications if it helps your resear ``` @misc{massa2018mrcnn, author = {Massa, Francisco and Girshick, Ross}, -title = {{maskrnn-benchmark: Fast, modular reference implementation of Instance Segmentation and Object Detection algorithms in PyTorch}}, +title = {{maskrcnn-benchmark: Fast, modular reference implementation of Instance Segmentation and Object Detection algorithms in PyTorch}}, year = {2018}, howpublished = {\url{https://github.com/facebookresearch/maskrcnn-benchmark}}, note = {Accessed: [Insert date here]} From de42d895ad7c2cee72259207fa9f2a7580c72cd0 Mon Sep 17 00:00:00 2001 From: Baptiste Metge Date: Tue, 5 Mar 2019 15:58:41 +0100 Subject: [PATCH 02/17] fix INSTALL.md (#510) * fix INSTALL.md * fix PR * Update INSTALL.md --- INSTALL.md | 22 ++++++++++------------ 1 file changed, 10 insertions(+), 12 deletions(-) diff --git a/INSTALL.md b/INSTALL.md index 67c2dee60..caa1f3b78 100644 --- a/INSTALL.md +++ b/INSTALL.md @@ -23,19 +23,8 @@ conda activate maskrcnn_benchmark # this installs the right pip and dependencies for the fresh python conda install ipython -# maskrcnn_benchmark and coco api dependencies -pip install -r requirements.txt - -# follow PyTorch installation in https://pytorch.org/get-started/locally/ -# we give the instructions for CUDA 9.0 -conda install pytorch-nightly cudatoolkit=9.0 -c pytorch - export INSTALL_DIR=$PWD -# install torchvision -cd $INSTALL_DIR -git clone https://github.com/pytorch/vision.git -cd vision -python setup.py install + # install pycocotools cd $INSTALL_DIR @@ -47,12 +36,21 @@ python setup.py build_ext install cd $INSTALL_DIR git clone https://github.com/facebookresearch/maskrcnn-benchmark.git cd maskrcnn-benchmark + +# maskrcnn_benchmark and coco api dependencies +pip install -r requirements.txt + +# follow PyTorch installation in https://pytorch.org/get-started/locally/ +# we give the instructions for CUDA 9.0 +conda install -c pytorch pytorch torchvision cudatoolkit=9.0 + # the following will install the lib with # symbolic links, so that you can modify # the files if you want and won't need to # re-build it python setup.py build develop + unset INSTALL_DIR # or if you are on macOS From fd20472282a24dfe996a3055af81ae9b7910b6ac Mon Sep 17 00:00:00 2001 From: Francisco Massa Date: Thu, 7 Mar 2019 15:47:35 -0800 Subject: [PATCH 03/17] Update ImageList to work with 3d tensors (#543) --- maskrcnn_benchmark/structures/image_list.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/maskrcnn_benchmark/structures/image_list.py b/maskrcnn_benchmark/structures/image_list.py index 9a70418fb..590b87a65 100644 --- a/maskrcnn_benchmark/structures/image_list.py +++ b/maskrcnn_benchmark/structures/image_list.py @@ -41,6 +41,8 @@ def to_image_list(tensors, size_divisible=0): return tensors elif isinstance(tensors, torch.Tensor): # single tensor shape can be inferred + if tensors.dim() == 3: + tensors = tensors[None] assert tensors.dim() == 4 image_sizes = [tensor.shape[-2:] for tensor in tensors] return ImageList(tensors, image_sizes) From 464b1af167d3dcb513964e62389825ff507609c3 Mon Sep 17 00:00:00 2001 From: Stzpz Date: Thu, 7 Mar 2019 15:57:51 -0800 Subject: [PATCH 04/17] Fbnet benchmark (#507) * Added a timer to benchmark model inference time in addition to total runtime. * Updated FBNet configs and included some baselines benchmark results. * Added a unit test for detectors. * Add links to the models --- MODEL_ZOO.md | 21 +++ configs/e2e_faster_rcnn_fbnet.yaml | 2 +- configs/e2e_faster_rcnn_fbnet_600.yaml | 2 +- .../e2e_faster_rcnn_fbnet_chamv1a_600.yaml | 44 ++++++ configs/e2e_mask_rcnn_fbnet.yaml | 4 +- configs/e2e_mask_rcnn_fbnet_600.yaml | 52 +++++++ .../e2e_mask_rcnn_fbnet_xirb16d_dsmask.yaml | 2 +- ...2e_mask_rcnn_fbnet_xirb16d_dsmask_600.yaml | 52 +++++++ maskrcnn_benchmark/engine/inference.py | 31 +++- maskrcnn_benchmark/modeling/backbone/fbnet.py | 7 - .../modeling/backbone/fbnet_builder.py | 3 + .../modeling/backbone/fbnet_modeldef.py | 89 ++++++++++- maskrcnn_benchmark/utils/timer.py | 46 ++++++ tests/test_detectors.py | 143 ++++++++++++++++++ 14 files changed, 475 insertions(+), 23 deletions(-) create mode 100755 configs/e2e_faster_rcnn_fbnet_chamv1a_600.yaml create mode 100755 configs/e2e_mask_rcnn_fbnet_600.yaml create mode 100755 configs/e2e_mask_rcnn_fbnet_xirb16d_dsmask_600.yaml create mode 100755 maskrcnn_benchmark/utils/timer.py create mode 100644 tests/test_detectors.py diff --git a/MODEL_ZOO.md b/MODEL_ZOO.md index a88546734..a0276d3d5 100644 --- a/MODEL_ZOO.md +++ b/MODEL_ZOO.md @@ -33,7 +33,28 @@ backbone | type | lr sched | im / gpu | train mem(GB) | train time (s/iter) | to -- | -- | -- | -- | -- | -- | -- | -- | -- | -- | -- R-50-FPN | Keypoint | 1x | 2 | 5.7 | 0.3771 | 9.4 | 0.10941 | 53.7 | 64.3 | 9981060 +### Light-weight Model baselines +We provided pre-trained models for selected FBNet models. +* All the models are trained from scratched with BN using the training schedule specified below. +* Evaluation is performed on a single NVIDIA V100 GPU with `MODEL.RPN.POST_NMS_TOP_N_TEST` set to `200`. + +The following inference time is reported: + * inference total batch=8: Total inference time including data loading, model inference and pre/post preprocessing using 8 images per batch. + * inference model batch=8: Model inference time only and using 8 images per batch. + * inference model batch=1: Model inference time only and using 1 image per batch. + * inferenee caffe2 batch=1: Model inference time for the model in Caffe2 format using 1 image per batch. The Caffe2 models fused the BN to Conv and purely run on C++/CUDA by using Caffe2 ops for rpn/detection post processing. + +The pre-trained models are available in the link in the model id. + +backbone | type | resolution | lr sched | im / gpu | train mem(GB) | train time (s/iter) | total train time (hr) | inference total batch=8 (s/im) | inference model batch=8 (s/im) | inference model batch=1 (s/im) | inference caffe2 batch=1 (s/im) | box AP | mask AP | model id +-- | -- | -- | -- | -- | -- | -- | -- | -- | -- | -- | -- | -- | -- | -- +[R-50-C4](configs/e2e_faster_rcnn_R_50_C4_1x.yaml) (reference) | Fast | 800 | 1x | 1 | 5.8 | 0.4036 | 20.2 | 0.0875 | **0.0793** | 0.0831 | **0.0625** | 34.4 | - | f35857197 +[fbnet_chamv1a](configs/e2e_faster_rcnn_fbnet_chamv1a_600.yaml) | Fast | 600 | 0.75x | 12 | 13.6 | 0.5444 | 20.5 | 0.0315 | **0.0260** | 0.0376 | **0.0188** | 33.5 | - | [f100940543](https://download.pytorch.org/models/maskrcnn/e2e_faster_rcnn_fbnet_chamv1a_600.pth) +[fbnet_default](configs/e2e_faster_rcnn_fbnet_600.yaml) | Fast | 600 | 0.5x | 16 | 11.1 | 0.4872 | 12.5 | 0.0316 | **0.0250** | 0.0297 | **0.0130** | 28.2 | - | [f101086388](https://download.pytorch.org/models/maskrcnn/e2e_faster_rcnn_fbnet_600.pth) +[R-50-C4](configs/e2e_mask_rcnn_R_50_C4_1x.yaml) (reference) | Mask | 800 | 1x | 1 | 5.8 | 0.452 | 22.6 | 0.0918 | **0.0848** | 0.0844 | - | 35.2 | 31.0 | f35858791 +[fbnet_xirb16d](configs/e2e_mask_rcnn_fbnet_xirb16d_dsmask_600.yaml) | Mask | 600 | 0.5x | 16 | 13.4 | 1.1732 | 29 | 0.0386 | **0.0319** | 0.0356 | - | 30.7 | 26.9 | [f101086394](https://download.pytorch.org/models/maskrcnn/e2e_mask_rcnn_fbnet_xirb16d_dsmask.pth) +[fbnet_default](configs/e2e_mask_rcnn_fbnet_600.yaml) | Mask | 600 | 0.5x | 16 | 13.0 | 0.9036 | 23.0 | 0.0327 | **0.0269** | 0.0385 | - | 29.0 | 26.1 | [f101086385](https://download.pytorch.org/models/maskrcnn/e2e_mask_rcnn_fbnet_600.pth) ## Comparison with Detectron and mmdetection diff --git a/configs/e2e_faster_rcnn_fbnet.yaml b/configs/e2e_faster_rcnn_fbnet.yaml index eed79ac83..bc0ba35fc 100755 --- a/configs/e2e_faster_rcnn_fbnet.yaml +++ b/configs/e2e_faster_rcnn_fbnet.yaml @@ -15,7 +15,7 @@ MODEL: PRE_NMS_TOP_N_TRAIN: 6000 PRE_NMS_TOP_N_TEST: 6000 POST_NMS_TOP_N_TRAIN: 2000 - POST_NMS_TOP_N_TEST: 1000 + POST_NMS_TOP_N_TEST: 100 RPN_HEAD: FBNet.rpn_head ROI_HEADS: BATCH_SIZE_PER_IMAGE: 512 diff --git a/configs/e2e_faster_rcnn_fbnet_600.yaml b/configs/e2e_faster_rcnn_fbnet_600.yaml index cd359b65c..9d0381ef6 100755 --- a/configs/e2e_faster_rcnn_fbnet_600.yaml +++ b/configs/e2e_faster_rcnn_fbnet_600.yaml @@ -15,7 +15,7 @@ MODEL: PRE_NMS_TOP_N_TRAIN: 6000 PRE_NMS_TOP_N_TEST: 6000 POST_NMS_TOP_N_TRAIN: 2000 - POST_NMS_TOP_N_TEST: 1000 + POST_NMS_TOP_N_TEST: 200 RPN_HEAD: FBNet.rpn_head ROI_HEADS: BATCH_SIZE_PER_IMAGE: 256 diff --git a/configs/e2e_faster_rcnn_fbnet_chamv1a_600.yaml b/configs/e2e_faster_rcnn_fbnet_chamv1a_600.yaml new file mode 100755 index 000000000..91e282778 --- /dev/null +++ b/configs/e2e_faster_rcnn_fbnet_chamv1a_600.yaml @@ -0,0 +1,44 @@ +MODEL: + META_ARCHITECTURE: "GeneralizedRCNN" + BACKBONE: + CONV_BODY: FBNet + FBNET: + ARCH: "cham_v1a" + BN_TYPE: "bn" + WIDTH_DIVISOR: 8 + DW_CONV_SKIP_BN: True + DW_CONV_SKIP_RELU: True + RPN: + ANCHOR_SIZES: (32, 64, 128, 256, 512) + ANCHOR_STRIDE: (16, ) + BATCH_SIZE_PER_IMAGE: 256 + PRE_NMS_TOP_N_TRAIN: 6000 + PRE_NMS_TOP_N_TEST: 6000 + POST_NMS_TOP_N_TRAIN: 2000 + POST_NMS_TOP_N_TEST: 200 + RPN_HEAD: FBNet.rpn_head + ROI_HEADS: + BATCH_SIZE_PER_IMAGE: 128 + ROI_BOX_HEAD: + POOLER_RESOLUTION: 6 + FEATURE_EXTRACTOR: FBNet.roi_head + NUM_CLASSES: 81 +DATASETS: + TRAIN: ("coco_2014_train", "coco_2014_valminusminival") + TEST: ("coco_2014_minival",) +SOLVER: + BASE_LR: 0.045 + WARMUP_FACTOR: 0.1 + WEIGHT_DECAY: 0.0001 + STEPS: (90000, 120000) + MAX_ITER: 135000 + IMS_PER_BATCH: 96 # for 8GPUs +# TEST: +# IMS_PER_BATCH: 8 +INPUT: + MIN_SIZE_TRAIN: (600, ) + MAX_SIZE_TRAIN: 1000 + MIN_SIZE_TEST: 600 + MAX_SIZE_TEST: 1000 + PIXEL_MEAN: [103.53, 116.28, 123.675] + PIXEL_STD: [57.375, 57.12, 58.395] diff --git a/configs/e2e_mask_rcnn_fbnet.yaml b/configs/e2e_mask_rcnn_fbnet.yaml index 94605dc29..308bdad72 100755 --- a/configs/e2e_mask_rcnn_fbnet.yaml +++ b/configs/e2e_mask_rcnn_fbnet.yaml @@ -8,7 +8,7 @@ MODEL: WIDTH_DIVISOR: 8 DW_CONV_SKIP_BN: True DW_CONV_SKIP_RELU: True - DET_HEAD_LAST_SCALE: -1.0 + DET_HEAD_LAST_SCALE: 0.0 RPN: ANCHOR_SIZES: (16, 32, 64, 128, 256) ANCHOR_STRIDE: (16, ) @@ -16,7 +16,7 @@ MODEL: PRE_NMS_TOP_N_TRAIN: 6000 PRE_NMS_TOP_N_TEST: 6000 POST_NMS_TOP_N_TRAIN: 2000 - POST_NMS_TOP_N_TEST: 1000 + POST_NMS_TOP_N_TEST: 100 RPN_HEAD: FBNet.rpn_head ROI_HEADS: BATCH_SIZE_PER_IMAGE: 256 diff --git a/configs/e2e_mask_rcnn_fbnet_600.yaml b/configs/e2e_mask_rcnn_fbnet_600.yaml new file mode 100755 index 000000000..8ec0c2f8a --- /dev/null +++ b/configs/e2e_mask_rcnn_fbnet_600.yaml @@ -0,0 +1,52 @@ +MODEL: + META_ARCHITECTURE: "GeneralizedRCNN" + BACKBONE: + CONV_BODY: FBNet + FBNET: + ARCH: "default" + BN_TYPE: "bn" + WIDTH_DIVISOR: 8 + DW_CONV_SKIP_BN: True + DW_CONV_SKIP_RELU: True + DET_HEAD_LAST_SCALE: 0.0 + RPN: + ANCHOR_SIZES: (32, 64, 128, 256, 512) + ANCHOR_STRIDE: (16, ) + BATCH_SIZE_PER_IMAGE: 256 + PRE_NMS_TOP_N_TRAIN: 6000 + PRE_NMS_TOP_N_TEST: 6000 + POST_NMS_TOP_N_TRAIN: 2000 + POST_NMS_TOP_N_TEST: 200 + RPN_HEAD: FBNet.rpn_head + ROI_HEADS: + BATCH_SIZE_PER_IMAGE: 256 + ROI_BOX_HEAD: + POOLER_RESOLUTION: 6 + FEATURE_EXTRACTOR: FBNet.roi_head + NUM_CLASSES: 81 + ROI_MASK_HEAD: + POOLER_RESOLUTION: 6 + FEATURE_EXTRACTOR: FBNet.roi_head_mask + PREDICTOR: "MaskRCNNConv1x1Predictor" + RESOLUTION: 12 + SHARE_BOX_FEATURE_EXTRACTOR: False + MASK_ON: True +DATASETS: + TRAIN: ("coco_2014_train", "coco_2014_valminusminival") + TEST: ("coco_2014_minival",) +SOLVER: + BASE_LR: 0.06 + WARMUP_FACTOR: 0.1 + WEIGHT_DECAY: 0.0001 + STEPS: (60000, 80000) + MAX_ITER: 90000 + IMS_PER_BATCH: 128 # for 8GPUs +# TEST: +# IMS_PER_BATCH: 8 +INPUT: + MIN_SIZE_TRAIN: (600, ) + MAX_SIZE_TRAIN: 1000 + MIN_SIZE_TEST: 600 + MAX_SIZE_TEST: 1000 + PIXEL_MEAN: [103.53, 116.28, 123.675] + PIXEL_STD: [57.375, 57.12, 58.395] diff --git a/configs/e2e_mask_rcnn_fbnet_xirb16d_dsmask.yaml b/configs/e2e_mask_rcnn_fbnet_xirb16d_dsmask.yaml index 91e0eba53..18c929711 100755 --- a/configs/e2e_mask_rcnn_fbnet_xirb16d_dsmask.yaml +++ b/configs/e2e_mask_rcnn_fbnet_xirb16d_dsmask.yaml @@ -16,7 +16,7 @@ MODEL: PRE_NMS_TOP_N_TRAIN: 6000 PRE_NMS_TOP_N_TEST: 6000 POST_NMS_TOP_N_TRAIN: 2000 - POST_NMS_TOP_N_TEST: 1000 + POST_NMS_TOP_N_TEST: 100 RPN_HEAD: FBNet.rpn_head ROI_HEADS: BATCH_SIZE_PER_IMAGE: 512 diff --git a/configs/e2e_mask_rcnn_fbnet_xirb16d_dsmask_600.yaml b/configs/e2e_mask_rcnn_fbnet_xirb16d_dsmask_600.yaml new file mode 100755 index 000000000..5bf030850 --- /dev/null +++ b/configs/e2e_mask_rcnn_fbnet_xirb16d_dsmask_600.yaml @@ -0,0 +1,52 @@ +MODEL: + META_ARCHITECTURE: "GeneralizedRCNN" + BACKBONE: + CONV_BODY: FBNet + FBNET: + ARCH: "xirb16d_dsmask" + BN_TYPE: "bn" + WIDTH_DIVISOR: 8 + DW_CONV_SKIP_BN: True + DW_CONV_SKIP_RELU: True + DET_HEAD_LAST_SCALE: 0.0 + RPN: + ANCHOR_SIZES: (32, 64, 128, 256, 512) + ANCHOR_STRIDE: (16, ) + BATCH_SIZE_PER_IMAGE: 256 + PRE_NMS_TOP_N_TRAIN: 6000 + PRE_NMS_TOP_N_TEST: 6000 + POST_NMS_TOP_N_TRAIN: 2000 + POST_NMS_TOP_N_TEST: 200 + RPN_HEAD: FBNet.rpn_head + ROI_HEADS: + BATCH_SIZE_PER_IMAGE: 256 + ROI_BOX_HEAD: + POOLER_RESOLUTION: 6 + FEATURE_EXTRACTOR: FBNet.roi_head + NUM_CLASSES: 81 + ROI_MASK_HEAD: + POOLER_RESOLUTION: 6 + FEATURE_EXTRACTOR: FBNet.roi_head_mask + PREDICTOR: "MaskRCNNConv1x1Predictor" + RESOLUTION: 12 + SHARE_BOX_FEATURE_EXTRACTOR: False + MASK_ON: True +DATASETS: + TRAIN: ("coco_2014_train", "coco_2014_valminusminival") + TEST: ("coco_2014_minival",) +SOLVER: + BASE_LR: 0.06 + WARMUP_FACTOR: 0.1 + WEIGHT_DECAY: 0.0001 + STEPS: (60000, 80000) + MAX_ITER: 90000 + IMS_PER_BATCH: 128 # for 8GPUs +# TEST: +# IMS_PER_BATCH: 8 +INPUT: + MIN_SIZE_TRAIN: (600, ) + MAX_SIZE_TRAIN: 1000 + MIN_SIZE_TEST: 600 + MAX_SIZE_TEST: 1000 + PIXEL_MEAN: [103.53, 116.28, 123.675] + PIXEL_STD: [57.375, 57.12, 58.395] diff --git a/maskrcnn_benchmark/engine/inference.py b/maskrcnn_benchmark/engine/inference.py index 1e0956aad..e125cb877 100644 --- a/maskrcnn_benchmark/engine/inference.py +++ b/maskrcnn_benchmark/engine/inference.py @@ -1,5 +1,4 @@ # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved. -import datetime import logging import time import os @@ -11,17 +10,23 @@ from ..utils.comm import is_main_process, get_world_size from ..utils.comm import all_gather from ..utils.comm import synchronize +from ..utils.timer import Timer, get_time_str -def compute_on_dataset(model, data_loader, device): +def compute_on_dataset(model, data_loader, device, timer=None): model.eval() results_dict = {} cpu_device = torch.device("cpu") - for i, batch in enumerate(tqdm(data_loader)): + for _, batch in enumerate(tqdm(data_loader)): images, targets, image_ids = batch images = images.to(device) with torch.no_grad(): + if timer: + timer.tic() output = model(images) + if timer: + torch.cuda.synchronize() + timer.toc() output = [o.to(cpu_device) for o in output] results_dict.update( {img_id: result for img_id, result in zip(image_ids, output)} @@ -68,17 +73,27 @@ def inference( logger = logging.getLogger("maskrcnn_benchmark.inference") dataset = data_loader.dataset logger.info("Start evaluation on {} dataset({} images).".format(dataset_name, len(dataset))) - start_time = time.time() - predictions = compute_on_dataset(model, data_loader, device) + total_timer = Timer() + inference_timer = Timer() + total_timer.tic() + predictions = compute_on_dataset(model, data_loader, device, inference_timer) # wait for all processes to complete before measuring the time synchronize() - total_time = time.time() - start_time - total_time_str = str(datetime.timedelta(seconds=total_time)) + total_time = total_timer.toc() + total_time_str = get_time_str(total_time) logger.info( - "Total inference time: {} ({} s / img per device, on {} devices)".format( + "Total run time: {} ({} s / img per device, on {} devices)".format( total_time_str, total_time * num_devices / len(dataset), num_devices ) ) + total_infer_time = get_time_str(inference_timer.total_time) + logger.info( + "Model inference time: {} ({} s / img per device, on {} devices)".format( + total_infer_time, + inference_timer.total_time * num_devices / len(dataset), + num_devices, + ) + ) predictions = _accumulate_predictions_from_multiple_gpus(predictions) if not is_main_process(): diff --git a/maskrcnn_benchmark/modeling/backbone/fbnet.py b/maskrcnn_benchmark/modeling/backbone/fbnet.py index 3669597a6..0d8cf1522 100755 --- a/maskrcnn_benchmark/modeling/backbone/fbnet.py +++ b/maskrcnn_benchmark/modeling/backbone/fbnet.py @@ -199,13 +199,6 @@ def __init__( ("last", last) ])) - # output_blob = builder.add_final_pool( - # # model, output_blob, kernel_size=cfg.FAST_RCNN.ROI_XFORM_RESOLUTION) - # model, - # output_blob, - # kernel_size=int(cfg.FAST_RCNN.ROI_XFORM_RESOLUTION / stride_init), - # ) - self.out_channels = builder.last_depth def forward(self, x, proposals): diff --git a/maskrcnn_benchmark/modeling/backbone/fbnet_builder.py b/maskrcnn_benchmark/modeling/backbone/fbnet_builder.py index 473161756..112a04074 100755 --- a/maskrcnn_benchmark/modeling/backbone/fbnet_builder.py +++ b/maskrcnn_benchmark/modeling/backbone/fbnet_builder.py @@ -771,6 +771,9 @@ def add_last(self, stage_info): last_channel = int(self.last_depth * (-channel_scale)) last_channel = self._get_divisible_width(last_channel) + if last_channel == 0: + return nn.Sequential() + dim_in = self.last_depth ret = ConvBNRelu( dim_in, diff --git a/maskrcnn_benchmark/modeling/backbone/fbnet_modeldef.py b/maskrcnn_benchmark/modeling/backbone/fbnet_modeldef.py index de666808a..fb1c96b3a 100755 --- a/maskrcnn_benchmark/modeling/backbone/fbnet_modeldef.py +++ b/maskrcnn_benchmark/modeling/backbone/fbnet_modeldef.py @@ -47,7 +47,7 @@ def add_archs(archs): [[4, 160, 1, 1], [6, 160, 3, 1], [3, 80, 1, -2]], ], # [c, channel_scale] - "last": [1280, 0.0], + "last": [0, 0.0], "backbone": [0, 1, 2, 3], "rpn": [5], "bbox": [4], @@ -91,7 +91,7 @@ def add_archs(archs): [[6, 128, 3, 1]], ], # [c, channel_scale] - "last": [1280, 0.0], + "last": [0, 0.0], "backbone": [0, 1, 2, 3], "rpn": [6], "bbox": [4], @@ -127,9 +127,92 @@ def add_archs(archs): [[6, 160, 3, 1], [6, 320, 1, 1]], ], # [c, channel_scale] - "last": [1280, 0.0], + "last": [0, 0.0], "backbone": [0, 1, 2, 3], "bbox": [4], }, }, } + + +MODEL_ARCH_CHAM = { + "cham_v1a": { + "block_op_type": [ + # stage 0 + ["ir_k3"], + # stage 1 + ["ir_k7"] * 2, + # stage 2 + ["ir_k3"] * 5, + # stage 3 + ["ir_k5"] * 7 + ["ir_k3"] * 5, + # stage 4, bbox head + ["ir_k3"] * 5, + # stage 5, rpn + ["ir_k3"] * 3, + ], + "block_cfg": { + "first": [32, 2], + "stages": [ + # [t, c, n, s] + # stage 0 + [[1, 24, 1, 1]], + # stage 1 + [[4, 48, 2, 2]], + # stage 2 + [[7, 64, 5, 2]], + # stage 3 + [[12, 56, 7, 2], [8, 88, 5, 1]], + # stage 4, bbox head + [[7, 152, 4, 2], [10, 104, 1, 1]], + # stage 5, rpn head + [[8, 88, 3, 1]], + ], + # [c, channel_scale] + "last": [0, 0.0], + "backbone": [0, 1, 2, 3], + "rpn": [5], + "bbox": [4], + }, + }, + "cham_v2": { + "block_op_type": [ + # stage 0 + ["ir_k3"], + # stage 1 + ["ir_k5"] * 4, + # stage 2 + ["ir_k7"] * 6, + # stage 3 + ["ir_k5"] * 3 + ["ir_k3"] * 6, + # stage 4, bbox head + ["ir_k3"] * 7, + # stage 5, rpn + ["ir_k3"] * 1, + ], + "block_cfg": { + "first": [32, 2], + "stages": [ + # [t, c, n, s] + # stage 0 + [[1, 24, 1, 1]], + # stage 1 + [[8, 32, 4, 2]], + # stage 2 + [[5, 48, 6, 2]], + # stage 3 + [[9, 56, 3, 2], [6, 56, 6, 1]], + # stage 4, bbox head + [[2, 160, 6, 2], [6, 112, 1, 1]], + # stage 5, rpn head + [[6, 56, 1, 1]], + ], + # [c, channel_scale] + "last": [0, 0.0], + "backbone": [0, 1, 2, 3], + "rpn": [5], + "bbox": [4], + }, + }, +} +add_archs(MODEL_ARCH_CHAM) diff --git a/maskrcnn_benchmark/utils/timer.py b/maskrcnn_benchmark/utils/timer.py new file mode 100755 index 000000000..935af1a30 --- /dev/null +++ b/maskrcnn_benchmark/utils/timer.py @@ -0,0 +1,46 @@ +# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved. + + +import time +import datetime + + +class Timer(object): + def __init__(self): + self.reset() + + @property + def average_time(self): + return self.total_time / self.calls if self.calls > 0 else 0.0 + + def tic(self): + # using time.time instead of time.clock because time time.clock + # does not normalize for multithreading + self.start_time = time.time() + + def toc(self, average=True): + self.add(time.time() - self.start_time) + if average: + return self.average_time + else: + return self.diff + + def add(self, time_diff): + self.diff = time_diff + self.total_time += self.diff + self.calls += 1 + + def reset(self): + self.total_time = 0.0 + self.calls = 0 + self.start_time = 0.0 + self.diff = 0.0 + + def avg_time_str(self): + time_str = str(datetime.timedelta(seconds=self.average_time)) + return time_str + + +def get_time_str(time_diff): + time_str = str(datetime.timedelta(seconds=time_diff)) + return time_str diff --git a/tests/test_detectors.py b/tests/test_detectors.py new file mode 100644 index 000000000..5f9f7bfa2 --- /dev/null +++ b/tests/test_detectors.py @@ -0,0 +1,143 @@ +# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved. + +import unittest +import glob +import os +import copy +import torch +from maskrcnn_benchmark.modeling.detector import build_detection_model +from maskrcnn_benchmark.structures.image_list import to_image_list +import utils + + +CONFIG_FILES = [ + # bbox + "e2e_faster_rcnn_R_50_C4_1x.yaml", + "e2e_faster_rcnn_R_50_FPN_1x.yaml", + "e2e_faster_rcnn_fbnet.yaml", + + # mask + "e2e_mask_rcnn_R_50_C4_1x.yaml", + "e2e_mask_rcnn_R_50_FPN_1x.yaml", + "e2e_mask_rcnn_fbnet.yaml", + + # keypoints + # TODO: fail to run for random model due to empty head input + # "e2e_keypoint_rcnn_R_50_FPN_1x.yaml", + + # gn + "gn_baselines/e2e_faster_rcnn_R_50_FPN_1x_gn.yaml", + # TODO: fail to run for random model due to empty head input + # "gn_baselines/e2e_mask_rcnn_R_50_FPN_Xconv1fc_1x_gn.yaml", + + # retinanet + "retinanet/retinanet_R-50-FPN_1x.yaml", + + # rpn only + "rpn_R_50_C4_1x.yaml", + "rpn_R_50_FPN_1x.yaml", +] + +EXCLUDED_FOLDERS = [ + "caffe2", + "quick_schedules", + "pascal_voc", + "cityscapes", +] + + +TEST_CUDA = torch.cuda.is_available() + + +def get_config_files(file_list, exclude_folders): + cfg_root_path = utils.get_config_root_path() + if file_list is not None: + files = [os.path.join(cfg_root_path, x) for x in file_list] + else: + files = glob.glob( + os.path.join(cfg_root_path, "./**/*.yaml"), recursive=True) + + def _contains(path, exclude_dirs): + return any(x in path for x in exclude_dirs) + + if exclude_folders is not None: + files = [x for x in files if not _contains(x, exclude_folders)] + + return files + + +def create_model(cfg, device): + cfg = copy.deepcopy(cfg) + cfg.freeze() + model = build_detection_model(cfg) + model = model.to(device) + return model + + +def create_random_input(cfg, device): + ret = [] + for x in cfg.INPUT.MIN_SIZE_TRAIN: + ret.append(torch.rand(3, x, int(x * 1.2))) + ret = to_image_list(ret, cfg.DATALOADER.SIZE_DIVISIBILITY) + ret = ret.to(device) + return ret + + +def _test_build_detectors(self, device): + ''' Make sure models build ''' + + cfg_files = get_config_files(None, EXCLUDED_FOLDERS) + self.assertGreater(len(cfg_files), 0) + + for cfg_file in cfg_files: + with self.subTest(cfg_file=cfg_file): + print('Testing {}...'.format(cfg_file)) + cfg = utils.load_config_from_file(cfg_file) + create_model(cfg, device) + + +def _test_run_selected_detectors(self, cfg_files, device): + ''' Make sure models build and run ''' + self.assertGreater(len(cfg_files), 0) + + for cfg_file in cfg_files: + with self.subTest(cfg_file=cfg_file): + print('Testing {}...'.format(cfg_file)) + cfg = utils.load_config_from_file(cfg_file) + cfg.MODEL.RPN.POST_NMS_TOP_N_TEST = 10 + cfg.MODEL.RPN.FPN_POST_NMS_TOP_N_TEST = 10 + model = create_model(cfg, device) + inputs = create_random_input(cfg, device) + model.eval() + output = model(inputs) + self.assertEqual(len(output), len(inputs.image_sizes)) + + +class TestDetectors(unittest.TestCase): + def test_build_detectors(self): + ''' Make sure models build ''' + _test_build_detectors(self, "cpu") + + @unittest.skipIf(not TEST_CUDA, "no CUDA detected") + def test_build_detectors_cuda(self): + ''' Make sure models build on gpu''' + _test_build_detectors(self, "cuda") + + def test_run_selected_detectors(self): + ''' Make sure models build and run ''' + # run on selected models + cfg_files = get_config_files(CONFIG_FILES, None) + # cfg_files = get_config_files(None, EXCLUDED_FOLDERS) + _test_run_selected_detectors(self, cfg_files, "cpu") + + @unittest.skipIf(not TEST_CUDA, "no CUDA detected") + def test_run_selected_detectors_cuda(self): + ''' Make sure models build and run on cuda ''' + # run on selected models + cfg_files = get_config_files(CONFIG_FILES, None) + # cfg_files = get_config_files(None, EXCLUDED_FOLDERS) + _test_run_selected_detectors(self, cfg_files, "cuda") + + +if __name__ == "__main__": + unittest.main() From 90080e60cc4a9252d019f475af044c9d4119b09b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Bernhard=20Sch=C3=A4fer?= Date: Sat, 9 Mar 2019 18:23:44 +0100 Subject: [PATCH 05/17] merge duplicate keys in yaml files (#545) --- .../caffe2/e2e_faster_rcnn_X_101_32x8d_FPN_1x_caffe2.yaml | 7 +++---- .../e2e_faster_rcnn_X_101_32x8d_FPN_quick.yaml | 7 +++---- .../e2e_mask_rcnn_X_101_32x8d_FPN_quick.yaml | 7 +++---- configs/retinanet/retinanet_X_101_32x8d_FPN_1x.yaml | 7 +++---- configs/rpn_X_101_32x8d_FPN_1x.yaml | 7 +++---- 5 files changed, 15 insertions(+), 20 deletions(-) diff --git a/configs/caffe2/e2e_faster_rcnn_X_101_32x8d_FPN_1x_caffe2.yaml b/configs/caffe2/e2e_faster_rcnn_X_101_32x8d_FPN_1x_caffe2.yaml index 7aeab5c1d..6682c38a5 100644 --- a/configs/caffe2/e2e_faster_rcnn_X_101_32x8d_FPN_1x_caffe2.yaml +++ b/configs/caffe2/e2e_faster_rcnn_X_101_32x8d_FPN_1x_caffe2.yaml @@ -5,6 +5,9 @@ MODEL: CONV_BODY: "R-101-FPN" RESNETS: BACKBONE_OUT_CHANNELS: 256 + STRIDE_IN_1X1: False + NUM_GROUPS: 32 + WIDTH_PER_GROUP: 8 RPN: USE_FPN: True ANCHOR_STRIDE: (4, 8, 16, 32, 64) @@ -20,10 +23,6 @@ MODEL: POOLER_SAMPLING_RATIO: 2 FEATURE_EXTRACTOR: "FPN2MLPFeatureExtractor" PREDICTOR: "FPNPredictor" - RESNETS: - STRIDE_IN_1X1: False - NUM_GROUPS: 32 - WIDTH_PER_GROUP: 8 DATASETS: TEST: ("coco_2014_minival",) DATALOADER: diff --git a/configs/quick_schedules/e2e_faster_rcnn_X_101_32x8d_FPN_quick.yaml b/configs/quick_schedules/e2e_faster_rcnn_X_101_32x8d_FPN_quick.yaml index bb74466de..0e6af5671 100644 --- a/configs/quick_schedules/e2e_faster_rcnn_X_101_32x8d_FPN_quick.yaml +++ b/configs/quick_schedules/e2e_faster_rcnn_X_101_32x8d_FPN_quick.yaml @@ -5,6 +5,9 @@ MODEL: CONV_BODY: "R-101-FPN" RESNETS: BACKBONE_OUT_CHANNELS: 256 + STRIDE_IN_1X1: False + NUM_GROUPS: 32 + WIDTH_PER_GROUP: 8 RPN: USE_FPN: True ANCHOR_STRIDE: (4, 8, 16, 32, 64) @@ -21,10 +24,6 @@ MODEL: POOLER_SAMPLING_RATIO: 2 FEATURE_EXTRACTOR: "FPN2MLPFeatureExtractor" PREDICTOR: "FPNPredictor" - RESNETS: - STRIDE_IN_1X1: False - NUM_GROUPS: 32 - WIDTH_PER_GROUP: 8 DATASETS: TRAIN: ("coco_2014_minival",) TEST: ("coco_2014_minival",) diff --git a/configs/quick_schedules/e2e_mask_rcnn_X_101_32x8d_FPN_quick.yaml b/configs/quick_schedules/e2e_mask_rcnn_X_101_32x8d_FPN_quick.yaml index 2ff9a8892..057dfc6b2 100644 --- a/configs/quick_schedules/e2e_mask_rcnn_X_101_32x8d_FPN_quick.yaml +++ b/configs/quick_schedules/e2e_mask_rcnn_X_101_32x8d_FPN_quick.yaml @@ -5,6 +5,9 @@ MODEL: CONV_BODY: "R-101-FPN" RESNETS: BACKBONE_OUT_CHANNELS: 256 + STRIDE_IN_1X1: False + NUM_GROUPS: 32 + WIDTH_PER_GROUP: 8 RPN: USE_FPN: True ANCHOR_STRIDE: (4, 8, 16, 32, 64) @@ -29,10 +32,6 @@ MODEL: POOLER_SAMPLING_RATIO: 2 RESOLUTION: 28 SHARE_BOX_FEATURE_EXTRACTOR: False - RESNETS: - STRIDE_IN_1X1: False - NUM_GROUPS: 32 - WIDTH_PER_GROUP: 8 MASK_ON: True DATASETS: TRAIN: ("coco_2014_minival",) diff --git a/configs/retinanet/retinanet_X_101_32x8d_FPN_1x.yaml b/configs/retinanet/retinanet_X_101_32x8d_FPN_1x.yaml index ea8d9f1bd..a03e3d664 100644 --- a/configs/retinanet/retinanet_X_101_32x8d_FPN_1x.yaml +++ b/configs/retinanet/retinanet_X_101_32x8d_FPN_1x.yaml @@ -7,6 +7,9 @@ MODEL: CONV_BODY: "R-101-FPN-RETINANET" RESNETS: BACKBONE_OUT_CHANNELS: 256 + STRIDE_IN_1X1: False + NUM_GROUPS: 32 + WIDTH_PER_GROUP: 8 RPN: USE_FPN: True FG_IOU_THRESHOLD: 0.5 @@ -25,10 +28,6 @@ MODEL: POOLER_SAMPLING_RATIO: 2 FEATURE_EXTRACTOR: "FPN2MLPFeatureExtractor" PREDICTOR: "FPNPredictor" - RESNETS: - STRIDE_IN_1X1: False - NUM_GROUPS: 32 - WIDTH_PER_GROUP: 8 RETINANET: SCALES_PER_OCTAVE: 3 STRADDLE_THRESH: -1 diff --git a/configs/rpn_X_101_32x8d_FPN_1x.yaml b/configs/rpn_X_101_32x8d_FPN_1x.yaml index 890594196..931b0452e 100644 --- a/configs/rpn_X_101_32x8d_FPN_1x.yaml +++ b/configs/rpn_X_101_32x8d_FPN_1x.yaml @@ -6,16 +6,15 @@ MODEL: CONV_BODY: "R-101-FPN" RESNETS: BACKBONE_OUT_CHANNELS: 256 + STRIDE_IN_1X1: False + NUM_GROUPS: 32 + WIDTH_PER_GROUP: 8 RPN: USE_FPN: True ANCHOR_STRIDE: (4, 8, 16, 32, 64) PRE_NMS_TOP_N_TEST: 1000 POST_NMS_TOP_N_TEST: 2000 FPN_POST_NMS_TOP_N_TEST: 2000 - RESNETS: - STRIDE_IN_1X1: False - NUM_GROUPS: 32 - WIDTH_PER_GROUP: 8 DATASETS: TRAIN: ("coco_2014_train", "coco_2014_valminusminival") TEST: ("coco_2014_minival",) From 8df030c69c92ef563e83d93bcfe1c9b7f92c8f5c Mon Sep 17 00:00:00 2001 From: vishwakftw Date: Mon, 11 Mar 2019 21:26:31 +0530 Subject: [PATCH 06/17] Fix dispatch breakage --- maskrcnn_benchmark/csrc/cpu/ROIAlign_cpu.cpp | 2 +- maskrcnn_benchmark/csrc/cpu/nms_cpu.cpp | 2 +- maskrcnn_benchmark/csrc/cuda/ROIAlign_cuda.cu | 4 ++-- maskrcnn_benchmark/csrc/cuda/ROIPool_cuda.cu | 4 ++-- maskrcnn_benchmark/csrc/cuda/SigmoidFocalLoss_cuda.cu | 4 ++-- 5 files changed, 8 insertions(+), 8 deletions(-) diff --git a/maskrcnn_benchmark/csrc/cpu/ROIAlign_cpu.cpp b/maskrcnn_benchmark/csrc/cpu/ROIAlign_cpu.cpp index d35aedf27..cd9fde2ae 100644 --- a/maskrcnn_benchmark/csrc/cpu/ROIAlign_cpu.cpp +++ b/maskrcnn_benchmark/csrc/cpu/ROIAlign_cpu.cpp @@ -239,7 +239,7 @@ at::Tensor ROIAlign_forward_cpu(const at::Tensor& input, return output; } - AT_DISPATCH_FLOATING_TYPES(input.type(), "ROIAlign_forward", [&] { + AT_DISPATCH_FLOATING_TYPES(input.scalar_type(), "ROIAlign_forward", [&] { ROIAlignForward_cpu_kernel( output_size, input.data(), diff --git a/maskrcnn_benchmark/csrc/cpu/nms_cpu.cpp b/maskrcnn_benchmark/csrc/cpu/nms_cpu.cpp index 1153dea04..639ca472e 100644 --- a/maskrcnn_benchmark/csrc/cpu/nms_cpu.cpp +++ b/maskrcnn_benchmark/csrc/cpu/nms_cpu.cpp @@ -68,7 +68,7 @@ at::Tensor nms_cpu(const at::Tensor& dets, const at::Tensor& scores, const float threshold) { at::Tensor result; - AT_DISPATCH_FLOATING_TYPES(dets.type(), "nms", [&] { + AT_DISPATCH_FLOATING_TYPES(dets.scalar_type(), "nms", [&] { result = nms_cpu_kernel(dets, scores, threshold); }); return result; diff --git a/maskrcnn_benchmark/csrc/cuda/ROIAlign_cuda.cu b/maskrcnn_benchmark/csrc/cuda/ROIAlign_cuda.cu index 1142fb375..170771aa8 100644 --- a/maskrcnn_benchmark/csrc/cuda/ROIAlign_cuda.cu +++ b/maskrcnn_benchmark/csrc/cuda/ROIAlign_cuda.cu @@ -280,7 +280,7 @@ at::Tensor ROIAlign_forward_cuda(const at::Tensor& input, return output; } - AT_DISPATCH_FLOATING_TYPES(input.type(), "ROIAlign_forward", [&] { + AT_DISPATCH_FLOATING_TYPES(input.scalar_type(), "ROIAlign_forward", [&] { RoIAlignForward<<>>( output_size, input.contiguous().data(), @@ -326,7 +326,7 @@ at::Tensor ROIAlign_backward_cuda(const at::Tensor& grad, return grad_input; } - AT_DISPATCH_FLOATING_TYPES(grad.type(), "ROIAlign_backward", [&] { + AT_DISPATCH_FLOATING_TYPES(grad.scalar_type(), "ROIAlign_backward", [&] { RoIAlignBackwardFeature<<>>( grad.numel(), grad.contiguous().data(), diff --git a/maskrcnn_benchmark/csrc/cuda/ROIPool_cuda.cu b/maskrcnn_benchmark/csrc/cuda/ROIPool_cuda.cu index 8f072ffc2..cef3beaa4 100644 --- a/maskrcnn_benchmark/csrc/cuda/ROIPool_cuda.cu +++ b/maskrcnn_benchmark/csrc/cuda/ROIPool_cuda.cu @@ -134,7 +134,7 @@ std::tuple ROIPool_forward_cuda(const at::Tensor& input, return std::make_tuple(output, argmax); } - AT_DISPATCH_FLOATING_TYPES(input.type(), "ROIPool_forward", [&] { + AT_DISPATCH_FLOATING_TYPES(input.scalar_type(), "ROIPool_forward", [&] { RoIPoolFForward<<>>( output_size, input.contiguous().data(), @@ -182,7 +182,7 @@ at::Tensor ROIPool_backward_cuda(const at::Tensor& grad, return grad_input; } - AT_DISPATCH_FLOATING_TYPES(grad.type(), "ROIPool_backward", [&] { + AT_DISPATCH_FLOATING_TYPES(grad.scalar_type(), "ROIPool_backward", [&] { RoIPoolFBackward<<>>( grad.numel(), grad.contiguous().data(), diff --git a/maskrcnn_benchmark/csrc/cuda/SigmoidFocalLoss_cuda.cu b/maskrcnn_benchmark/csrc/cuda/SigmoidFocalLoss_cuda.cu index 7d40767bb..cd9b4c96b 100644 --- a/maskrcnn_benchmark/csrc/cuda/SigmoidFocalLoss_cuda.cu +++ b/maskrcnn_benchmark/csrc/cuda/SigmoidFocalLoss_cuda.cu @@ -125,7 +125,7 @@ at::Tensor SigmoidFocalLoss_forward_cuda( return losses; } - AT_DISPATCH_FLOATING_TYPES(logits.type(), "SigmoidFocalLoss_forward", [&] { + AT_DISPATCH_FLOATING_TYPES(logits.scalar_type(), "SigmoidFocalLoss_forward", [&] { SigmoidFocalLossForward<<>>( losses_size, logits.contiguous().data(), @@ -169,7 +169,7 @@ at::Tensor SigmoidFocalLoss_backward_cuda( return d_logits; } - AT_DISPATCH_FLOATING_TYPES(logits.type(), "SigmoidFocalLoss_backward", [&] { + AT_DISPATCH_FLOATING_TYPES(logits.scalar_type(), "SigmoidFocalLoss_backward", [&] { SigmoidFocalLossBackward<<>>( d_logits_size, logits.contiguous().data(), From 558d7afad13db40bf7062f9c9b09bb7acf755a82 Mon Sep 17 00:00:00 2001 From: Francisco Massa Date: Mon, 11 Mar 2019 17:58:10 +0100 Subject: [PATCH 07/17] Fix installation instructions and pin PyTorch to a nightly (#557) --- INSTALL.md | 17 ++++++++--------- 1 file changed, 8 insertions(+), 9 deletions(-) diff --git a/INSTALL.md b/INSTALL.md index caa1f3b78..8365be8f9 100644 --- a/INSTALL.md +++ b/INSTALL.md @@ -1,7 +1,7 @@ ## Installation ### Requirements: -- PyTorch 1.0 from a nightly release. Installation instructions can be found in https://pytorch.org/get-started/locally/ +- PyTorch 1.0 from a nightly release. It **will not** work with 1.0 nor 1.0.1. Installation instructions can be found in https://pytorch.org/get-started/locally/ - torchvision from master - cocoapi - yacs @@ -23,8 +23,14 @@ conda activate maskrcnn_benchmark # this installs the right pip and dependencies for the fresh python conda install ipython -export INSTALL_DIR=$PWD +# maskrcnn_benchmark and coco api dependencies +pip install ninja yacs cython matplotlib tqdm + +# follow PyTorch installation in https://pytorch.org/get-started/locally/ +# we give the instructions for CUDA 9.0 +conda install -c pytorch pytorch-nightly torchvision cudatoolkit=9.0 +export INSTALL_DIR=$PWD # install pycocotools cd $INSTALL_DIR @@ -37,13 +43,6 @@ cd $INSTALL_DIR git clone https://github.com/facebookresearch/maskrcnn-benchmark.git cd maskrcnn-benchmark -# maskrcnn_benchmark and coco api dependencies -pip install -r requirements.txt - -# follow PyTorch installation in https://pytorch.org/get-started/locally/ -# we give the instructions for CUDA 9.0 -conda install -c pytorch pytorch torchvision cudatoolkit=9.0 - # the following will install the lib with # symbolic links, so that you can modify # the files if you want and won't need to From b3d1de0088ad84b7a1cdee62c08418c7b9095acc Mon Sep 17 00:00:00 2001 From: Csaba Botos Date: Tue, 12 Mar 2019 18:08:28 +0100 Subject: [PATCH 08/17] use internal interpolate implementation (#559) --- maskrcnn_benchmark/modeling/roi_heads/mask_head/inference.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/maskrcnn_benchmark/modeling/roi_heads/mask_head/inference.py b/maskrcnn_benchmark/modeling/roi_heads/mask_head/inference.py index e89d513c9..cd033e06c 100644 --- a/maskrcnn_benchmark/modeling/roi_heads/mask_head/inference.py +++ b/maskrcnn_benchmark/modeling/roi_heads/mask_head/inference.py @@ -2,7 +2,7 @@ import numpy as np import torch from torch import nn -import torch.nn.functional as F +from maskrcnn_benchmark.layers.misc import interpolate from maskrcnn_benchmark.structures.bounding_box import BoxList @@ -132,7 +132,7 @@ def paste_mask_in_image(mask, box, im_h, im_w, thresh=0.5, padding=1): # Resize mask mask = mask.to(torch.float32) - mask = F.interpolate(mask, size=(h, w), mode='bilinear', align_corners=False) + mask = interpolate(mask, size=(h, w), mode='bilinear', align_corners=False) mask = mask[0][0] if thresh >= 0: From 9063850dc3069dce9d6a8ce9f65f8449b1cd3be7 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Bernhard=20Sch=C3=A4fer?= Date: Mon, 25 Mar 2019 11:19:55 +0100 Subject: [PATCH 09/17] README - add your own dataset (#569) --- README.md | 12 +++++++++++- 1 file changed, 11 insertions(+), 1 deletion(-) diff --git a/README.md b/README.md index 01a81d812..780722ed8 100644 --- a/README.md +++ b/README.md @@ -198,11 +198,21 @@ That's it. You can also add extra fields to the boxlist, such as segmentation ma For a full example of how the `COCODataset` is implemented, check [`maskrcnn_benchmark/data/datasets/coco.py`](maskrcnn_benchmark/data/datasets/coco.py). -### Note: +Once you have created your dataset, it needs to be added in a couple of places: +- [`maskrcnn_benchmark/data/datasets/__init__.py`](maskrcnn_benchmark/data/datasets/__init__.py): add it to `__all__` +- [`maskrcnn_benchmark/config/paths_catalog.py`](maskrcnn_benchmark/config/paths_catalog.py): `DatasetCatalog.DATASETS` and corresponding `if` clause in `DatasetCatalog.get()` + +### Testing While the aforementioned example should work for training, we leverage the cocoApi for computing the accuracies during testing. Thus, test datasets should currently follow the cocoApi for now. +To enable your dataset for testing, add a corresponding if statement in [`maskrcnn_benchmark/data/datasets/evaluation/__init__.py`](maskrcnn_benchmark/data/datasets/evaluation/__init__.py): +```python +if isinstance(dataset, datasets.MyDataset): + return coco_evaluation(**args) +``` + ## Finetuning from Detectron weights on custom datasets Create a script `tools/trim_detectron_model.py` like [here](https://gist.github.com/wangg12/aea194aa6ab6a4de088f14ee193fd968). You can decide which keys to be removed and which keys to be kept by modifying the script. From f0318794779581419b0162e24e6923ac3711cfd6 Mon Sep 17 00:00:00 2001 From: kaiJIN Date: Tue, 26 Mar 2019 18:35:58 +0800 Subject: [PATCH 10/17] Support for running on arbitrary CUDA device. (#537) * support for any one cuda device * Revert "support for any one cuda device" This reverts commit 0197e4e2ef18ec41cc155f3ae2a0face5b77e1e9. * support runnning for anyone cuda device * using safe CUDAGuard rather than intrinsic CUDASetDevice * supplement a header dependency (test passed) * Support for arbitrary GPU device. * Support for arbitrary GPU device. * add docs for two method to control devices --- README.md | 21 +++++++++++++++++++ maskrcnn_benchmark/csrc/cuda/ROIAlign_cuda.cu | 4 ++++ maskrcnn_benchmark/csrc/cuda/ROIPool_cuda.cu | 4 ++++ .../csrc/cuda/SigmoidFocalLoss_cuda.cu | 7 ++++++- maskrcnn_benchmark/csrc/cuda/nms.cu | 3 +++ 5 files changed, 38 insertions(+), 1 deletion(-) diff --git a/README.md b/README.md index 780722ed8..c5acea6fa 100644 --- a/README.md +++ b/README.md @@ -68,6 +68,27 @@ image = ... predictions = coco_demo.run_on_opencv_image(image) ``` +### Use it on an arbitrary GPU device +For some cases, while multi-GPU devices are installed in a machine, a possible situation is that +we only have accesse to a specified GPU device (e.g. CUDA:1 or CUDA:2) for inference, testing or training. +Here, the repository currently supports two methods to control devices. + +#### 1. using CUDA_VISIBLE_DEVICES environment variable (Recommend) +Here is an example for Mask R-CNN R-50 FPN quick on the second device (CUDA:1): +```bash +export CUDA_VISIBLE_DEVICES=1 +python tools/train_net.py --config-file=configs/quick_schedules/e2e_mask_rcnn_R_50_FPN_quick.yaml +``` +Now, the session will be totally loaded on the second GPU device (CUDA:1). + +#### 2. using MODEL.DEVICE flag +In addition, the program could run on a sepcific GPU device by setting `MODEL.DEVICE` flag. +```bash +python tools/train_net.py --config-file=configs/quick_schedules/e2e_mask_rcnn_R_50_FPN_quick.yaml MODEL.DEVICE cuda:1 +``` +Where, we add a `MODEL.DEVICE cuda:1` flag to configure the target device. +*Pay attention, there is still a small part of memory stored in `cuda:0` for some reasons.* + ## Perform training on COCO dataset For the following examples to work, you need to first install `maskrcnn_benchmark`. diff --git a/maskrcnn_benchmark/csrc/cuda/ROIAlign_cuda.cu b/maskrcnn_benchmark/csrc/cuda/ROIAlign_cuda.cu index 170771aa8..29e7ac6ed 100644 --- a/maskrcnn_benchmark/csrc/cuda/ROIAlign_cuda.cu +++ b/maskrcnn_benchmark/csrc/cuda/ROIAlign_cuda.cu @@ -1,6 +1,7 @@ // Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved. #include #include +#include #include #include @@ -263,6 +264,8 @@ at::Tensor ROIAlign_forward_cuda(const at::Tensor& input, AT_ASSERTM(input.type().is_cuda(), "input must be a CUDA tensor"); AT_ASSERTM(rois.type().is_cuda(), "rois must be a CUDA tensor"); + at::cuda::CUDAGuard device_guard(input.device()); + auto num_rois = rois.size(0); auto channels = input.size(1); auto height = input.size(2); @@ -311,6 +314,7 @@ at::Tensor ROIAlign_backward_cuda(const at::Tensor& grad, const int sampling_ratio) { AT_ASSERTM(grad.type().is_cuda(), "grad must be a CUDA tensor"); AT_ASSERTM(rois.type().is_cuda(), "rois must be a CUDA tensor"); + at::cuda::CUDAGuard device_guard(grad.device()); auto num_rois = rois.size(0); auto grad_input = at::zeros({batch_size, channels, height, width}, grad.options()); diff --git a/maskrcnn_benchmark/csrc/cuda/ROIPool_cuda.cu b/maskrcnn_benchmark/csrc/cuda/ROIPool_cuda.cu index cef3beaa4..f79bb71dc 100644 --- a/maskrcnn_benchmark/csrc/cuda/ROIPool_cuda.cu +++ b/maskrcnn_benchmark/csrc/cuda/ROIPool_cuda.cu @@ -1,6 +1,7 @@ // Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved. #include #include +#include #include #include @@ -115,6 +116,8 @@ std::tuple ROIPool_forward_cuda(const at::Tensor& input, AT_ASSERTM(input.type().is_cuda(), "input must be a CUDA tensor"); AT_ASSERTM(rois.type().is_cuda(), "rois must be a CUDA tensor"); + at::cuda::CUDAGuard device_guard(input.device()); + auto num_rois = rois.size(0); auto channels = input.size(1); auto height = input.size(2); @@ -167,6 +170,7 @@ at::Tensor ROIPool_backward_cuda(const at::Tensor& grad, AT_ASSERTM(grad.type().is_cuda(), "grad must be a CUDA tensor"); AT_ASSERTM(rois.type().is_cuda(), "rois must be a CUDA tensor"); // TODO add more checks + at::cuda::CUDAGuard device_guard(grad.device()); auto num_rois = rois.size(0); auto grad_input = at::zeros({batch_size, channels, height, width}, grad.options()); diff --git a/maskrcnn_benchmark/csrc/cuda/SigmoidFocalLoss_cuda.cu b/maskrcnn_benchmark/csrc/cuda/SigmoidFocalLoss_cuda.cu index cd9b4c96b..e25424e00 100644 --- a/maskrcnn_benchmark/csrc/cuda/SigmoidFocalLoss_cuda.cu +++ b/maskrcnn_benchmark/csrc/cuda/SigmoidFocalLoss_cuda.cu @@ -4,6 +4,7 @@ // cyfu@cs.unc.edu #include #include +#include #include #include @@ -111,6 +112,8 @@ at::Tensor SigmoidFocalLoss_forward_cuda( AT_ASSERTM(targets.type().is_cuda(), "targets must be a CUDA tensor"); AT_ASSERTM(logits.dim() == 2, "logits should be NxClass"); + at::cuda::CUDAGuard device_guard(logits.device()); + const int num_samples = logits.size(0); auto losses = at::empty({num_samples, logits.size(1)}, logits.options()); @@ -156,7 +159,9 @@ at::Tensor SigmoidFocalLoss_backward_cuda( const int num_samples = logits.size(0); AT_ASSERTM(logits.size(1) == num_classes, "logits.size(1) should be num_classes"); - + + at::cuda::CUDAGuard device_guard(logits.device()); + auto d_logits = at::zeros({num_samples, num_classes}, logits.options()); auto d_logits_size = num_samples * logits.size(1); cudaStream_t stream = at::cuda::getCurrentCUDAStream(); diff --git a/maskrcnn_benchmark/csrc/cuda/nms.cu b/maskrcnn_benchmark/csrc/cuda/nms.cu index 833d8523a..7bb0e50a1 100644 --- a/maskrcnn_benchmark/csrc/cuda/nms.cu +++ b/maskrcnn_benchmark/csrc/cuda/nms.cu @@ -1,6 +1,7 @@ // Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved. #include #include +#include #include #include @@ -70,6 +71,8 @@ __global__ void nms_kernel(const int n_boxes, const float nms_overlap_thresh, at::Tensor nms_cuda(const at::Tensor boxes, float nms_overlap_thresh) { using scalar_t = float; AT_ASSERTM(boxes.type().is_cuda(), "boxes must be a CUDA tensor"); + at::cuda::CUDAGuard device_guard(boxes.device()); + auto scores = boxes.select(1, 4); auto order_t = std::get<1>(scores.sort(0, /* descending=*/true)); auto boxes_sorted = boxes.index_select(0, order_t); From 4a7dcc4da980b1f6681a1dcefdc0938b1adbe623 Mon Sep 17 00:00:00 2001 From: Miguel Varela Ramos Date: Tue, 26 Mar 2019 11:36:55 +0100 Subject: [PATCH 11/17] Rollback dispatch patch (#603) * Merge branch 'master' of /home/braincreator/projects/maskrcnn-benchmark with conflicts. * rolls back the breaking AT dispatch changes (#555) * revert accidental docker changes * revert accidental docker changes (2) --- maskrcnn_benchmark/csrc/cpu/ROIAlign_cpu.cpp | 2 +- maskrcnn_benchmark/csrc/cpu/nms_cpu.cpp | 2 +- maskrcnn_benchmark/csrc/cuda/ROIAlign_cuda.cu | 4 ++-- maskrcnn_benchmark/csrc/cuda/ROIPool_cuda.cu | 4 ++-- maskrcnn_benchmark/csrc/cuda/SigmoidFocalLoss_cuda.cu | 4 ++-- 5 files changed, 8 insertions(+), 8 deletions(-) diff --git a/maskrcnn_benchmark/csrc/cpu/ROIAlign_cpu.cpp b/maskrcnn_benchmark/csrc/cpu/ROIAlign_cpu.cpp index cd9fde2ae..d35aedf27 100644 --- a/maskrcnn_benchmark/csrc/cpu/ROIAlign_cpu.cpp +++ b/maskrcnn_benchmark/csrc/cpu/ROIAlign_cpu.cpp @@ -239,7 +239,7 @@ at::Tensor ROIAlign_forward_cpu(const at::Tensor& input, return output; } - AT_DISPATCH_FLOATING_TYPES(input.scalar_type(), "ROIAlign_forward", [&] { + AT_DISPATCH_FLOATING_TYPES(input.type(), "ROIAlign_forward", [&] { ROIAlignForward_cpu_kernel( output_size, input.data(), diff --git a/maskrcnn_benchmark/csrc/cpu/nms_cpu.cpp b/maskrcnn_benchmark/csrc/cpu/nms_cpu.cpp index 639ca472e..1153dea04 100644 --- a/maskrcnn_benchmark/csrc/cpu/nms_cpu.cpp +++ b/maskrcnn_benchmark/csrc/cpu/nms_cpu.cpp @@ -68,7 +68,7 @@ at::Tensor nms_cpu(const at::Tensor& dets, const at::Tensor& scores, const float threshold) { at::Tensor result; - AT_DISPATCH_FLOATING_TYPES(dets.scalar_type(), "nms", [&] { + AT_DISPATCH_FLOATING_TYPES(dets.type(), "nms", [&] { result = nms_cpu_kernel(dets, scores, threshold); }); return result; diff --git a/maskrcnn_benchmark/csrc/cuda/ROIAlign_cuda.cu b/maskrcnn_benchmark/csrc/cuda/ROIAlign_cuda.cu index 29e7ac6ed..2ff36adee 100644 --- a/maskrcnn_benchmark/csrc/cuda/ROIAlign_cuda.cu +++ b/maskrcnn_benchmark/csrc/cuda/ROIAlign_cuda.cu @@ -283,7 +283,7 @@ at::Tensor ROIAlign_forward_cuda(const at::Tensor& input, return output; } - AT_DISPATCH_FLOATING_TYPES(input.scalar_type(), "ROIAlign_forward", [&] { + AT_DISPATCH_FLOATING_TYPES(input.type(), "ROIAlign_forward", [&] { RoIAlignForward<<>>( output_size, input.contiguous().data(), @@ -330,7 +330,7 @@ at::Tensor ROIAlign_backward_cuda(const at::Tensor& grad, return grad_input; } - AT_DISPATCH_FLOATING_TYPES(grad.scalar_type(), "ROIAlign_backward", [&] { + AT_DISPATCH_FLOATING_TYPES(grad.type(), "ROIAlign_backward", [&] { RoIAlignBackwardFeature<<>>( grad.numel(), grad.contiguous().data(), diff --git a/maskrcnn_benchmark/csrc/cuda/ROIPool_cuda.cu b/maskrcnn_benchmark/csrc/cuda/ROIPool_cuda.cu index f79bb71dc..0b2b1758c 100644 --- a/maskrcnn_benchmark/csrc/cuda/ROIPool_cuda.cu +++ b/maskrcnn_benchmark/csrc/cuda/ROIPool_cuda.cu @@ -137,7 +137,7 @@ std::tuple ROIPool_forward_cuda(const at::Tensor& input, return std::make_tuple(output, argmax); } - AT_DISPATCH_FLOATING_TYPES(input.scalar_type(), "ROIPool_forward", [&] { + AT_DISPATCH_FLOATING_TYPES(input.type(), "ROIPool_forward", [&] { RoIPoolFForward<<>>( output_size, input.contiguous().data(), @@ -186,7 +186,7 @@ at::Tensor ROIPool_backward_cuda(const at::Tensor& grad, return grad_input; } - AT_DISPATCH_FLOATING_TYPES(grad.scalar_type(), "ROIPool_backward", [&] { + AT_DISPATCH_FLOATING_TYPES(grad.type(), "ROIPool_backward", [&] { RoIPoolFBackward<<>>( grad.numel(), grad.contiguous().data(), diff --git a/maskrcnn_benchmark/csrc/cuda/SigmoidFocalLoss_cuda.cu b/maskrcnn_benchmark/csrc/cuda/SigmoidFocalLoss_cuda.cu index e25424e00..e0e7d3bff 100644 --- a/maskrcnn_benchmark/csrc/cuda/SigmoidFocalLoss_cuda.cu +++ b/maskrcnn_benchmark/csrc/cuda/SigmoidFocalLoss_cuda.cu @@ -128,7 +128,7 @@ at::Tensor SigmoidFocalLoss_forward_cuda( return losses; } - AT_DISPATCH_FLOATING_TYPES(logits.scalar_type(), "SigmoidFocalLoss_forward", [&] { + AT_DISPATCH_FLOATING_TYPES(logits.type(), "SigmoidFocalLoss_forward", [&] { SigmoidFocalLossForward<<>>( losses_size, logits.contiguous().data(), @@ -174,7 +174,7 @@ at::Tensor SigmoidFocalLoss_backward_cuda( return d_logits; } - AT_DISPATCH_FLOATING_TYPES(logits.scalar_type(), "SigmoidFocalLoss_backward", [&] { + AT_DISPATCH_FLOATING_TYPES(logits.type(), "SigmoidFocalLoss_backward", [&] { SigmoidFocalLossBackward<<>>( d_logits_size, logits.contiguous().data(), From bd39d2c1019200a84a0799976e96cf96fdb0d573 Mon Sep 17 00:00:00 2001 From: Miguel Varela Ramos Date: Tue, 26 Mar 2019 13:06:55 +0100 Subject: [PATCH 12/17] Fixes conda CUDA version in Dockerfile (#604) * fixes to dockerfile * replaces local installation by git clone --- docker/Dockerfile | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/docker/Dockerfile b/docker/Dockerfile index 39b508258..58b924cf4 100644 --- a/docker/Dockerfile +++ b/docker/Dockerfile @@ -31,8 +31,9 @@ ENV CONDA_AUTO_UPDATE_CONDA=false RUN conda install -y ipython RUN pip install ninja yacs cython matplotlib opencv-python -# Install PyTorch 1.0 Nightly and OpenCV -RUN conda install -y pytorch-nightly -c pytorch \ +# Install PyTorch 1.0 Nightly +ARG CUDA +RUN echo conda install pytorch-nightly cudatoolkit=${CUDA} -c pytorch \ && conda clean -ya # Install TorchVision master From 05feadff540c0d43e6752db0513d21e41038dbdb Mon Sep 17 00:00:00 2001 From: Francisco Massa Date: Tue, 26 Mar 2019 18:33:11 +0100 Subject: [PATCH 13/17] Revert "Support for running on arbitrary CUDA device. (#537)" (#608) This reverts commit f0318794779581419b0162e24e6923ac3711cfd6. --- README.md | 21 ------------------- maskrcnn_benchmark/csrc/cuda/ROIAlign_cuda.cu | 4 ---- maskrcnn_benchmark/csrc/cuda/ROIPool_cuda.cu | 4 ---- .../csrc/cuda/SigmoidFocalLoss_cuda.cu | 7 +------ maskrcnn_benchmark/csrc/cuda/nms.cu | 3 --- 5 files changed, 1 insertion(+), 38 deletions(-) diff --git a/README.md b/README.md index c5acea6fa..780722ed8 100644 --- a/README.md +++ b/README.md @@ -68,27 +68,6 @@ image = ... predictions = coco_demo.run_on_opencv_image(image) ``` -### Use it on an arbitrary GPU device -For some cases, while multi-GPU devices are installed in a machine, a possible situation is that -we only have accesse to a specified GPU device (e.g. CUDA:1 or CUDA:2) for inference, testing or training. -Here, the repository currently supports two methods to control devices. - -#### 1. using CUDA_VISIBLE_DEVICES environment variable (Recommend) -Here is an example for Mask R-CNN R-50 FPN quick on the second device (CUDA:1): -```bash -export CUDA_VISIBLE_DEVICES=1 -python tools/train_net.py --config-file=configs/quick_schedules/e2e_mask_rcnn_R_50_FPN_quick.yaml -``` -Now, the session will be totally loaded on the second GPU device (CUDA:1). - -#### 2. using MODEL.DEVICE flag -In addition, the program could run on a sepcific GPU device by setting `MODEL.DEVICE` flag. -```bash -python tools/train_net.py --config-file=configs/quick_schedules/e2e_mask_rcnn_R_50_FPN_quick.yaml MODEL.DEVICE cuda:1 -``` -Where, we add a `MODEL.DEVICE cuda:1` flag to configure the target device. -*Pay attention, there is still a small part of memory stored in `cuda:0` for some reasons.* - ## Perform training on COCO dataset For the following examples to work, you need to first install `maskrcnn_benchmark`. diff --git a/maskrcnn_benchmark/csrc/cuda/ROIAlign_cuda.cu b/maskrcnn_benchmark/csrc/cuda/ROIAlign_cuda.cu index 2ff36adee..1142fb375 100644 --- a/maskrcnn_benchmark/csrc/cuda/ROIAlign_cuda.cu +++ b/maskrcnn_benchmark/csrc/cuda/ROIAlign_cuda.cu @@ -1,7 +1,6 @@ // Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved. #include #include -#include #include #include @@ -264,8 +263,6 @@ at::Tensor ROIAlign_forward_cuda(const at::Tensor& input, AT_ASSERTM(input.type().is_cuda(), "input must be a CUDA tensor"); AT_ASSERTM(rois.type().is_cuda(), "rois must be a CUDA tensor"); - at::cuda::CUDAGuard device_guard(input.device()); - auto num_rois = rois.size(0); auto channels = input.size(1); auto height = input.size(2); @@ -314,7 +311,6 @@ at::Tensor ROIAlign_backward_cuda(const at::Tensor& grad, const int sampling_ratio) { AT_ASSERTM(grad.type().is_cuda(), "grad must be a CUDA tensor"); AT_ASSERTM(rois.type().is_cuda(), "rois must be a CUDA tensor"); - at::cuda::CUDAGuard device_guard(grad.device()); auto num_rois = rois.size(0); auto grad_input = at::zeros({batch_size, channels, height, width}, grad.options()); diff --git a/maskrcnn_benchmark/csrc/cuda/ROIPool_cuda.cu b/maskrcnn_benchmark/csrc/cuda/ROIPool_cuda.cu index 0b2b1758c..8f072ffc2 100644 --- a/maskrcnn_benchmark/csrc/cuda/ROIPool_cuda.cu +++ b/maskrcnn_benchmark/csrc/cuda/ROIPool_cuda.cu @@ -1,7 +1,6 @@ // Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved. #include #include -#include #include #include @@ -116,8 +115,6 @@ std::tuple ROIPool_forward_cuda(const at::Tensor& input, AT_ASSERTM(input.type().is_cuda(), "input must be a CUDA tensor"); AT_ASSERTM(rois.type().is_cuda(), "rois must be a CUDA tensor"); - at::cuda::CUDAGuard device_guard(input.device()); - auto num_rois = rois.size(0); auto channels = input.size(1); auto height = input.size(2); @@ -170,7 +167,6 @@ at::Tensor ROIPool_backward_cuda(const at::Tensor& grad, AT_ASSERTM(grad.type().is_cuda(), "grad must be a CUDA tensor"); AT_ASSERTM(rois.type().is_cuda(), "rois must be a CUDA tensor"); // TODO add more checks - at::cuda::CUDAGuard device_guard(grad.device()); auto num_rois = rois.size(0); auto grad_input = at::zeros({batch_size, channels, height, width}, grad.options()); diff --git a/maskrcnn_benchmark/csrc/cuda/SigmoidFocalLoss_cuda.cu b/maskrcnn_benchmark/csrc/cuda/SigmoidFocalLoss_cuda.cu index e0e7d3bff..7d40767bb 100644 --- a/maskrcnn_benchmark/csrc/cuda/SigmoidFocalLoss_cuda.cu +++ b/maskrcnn_benchmark/csrc/cuda/SigmoidFocalLoss_cuda.cu @@ -4,7 +4,6 @@ // cyfu@cs.unc.edu #include #include -#include #include #include @@ -112,8 +111,6 @@ at::Tensor SigmoidFocalLoss_forward_cuda( AT_ASSERTM(targets.type().is_cuda(), "targets must be a CUDA tensor"); AT_ASSERTM(logits.dim() == 2, "logits should be NxClass"); - at::cuda::CUDAGuard device_guard(logits.device()); - const int num_samples = logits.size(0); auto losses = at::empty({num_samples, logits.size(1)}, logits.options()); @@ -159,9 +156,7 @@ at::Tensor SigmoidFocalLoss_backward_cuda( const int num_samples = logits.size(0); AT_ASSERTM(logits.size(1) == num_classes, "logits.size(1) should be num_classes"); - - at::cuda::CUDAGuard device_guard(logits.device()); - + auto d_logits = at::zeros({num_samples, num_classes}, logits.options()); auto d_logits_size = num_samples * logits.size(1); cudaStream_t stream = at::cuda::getCurrentCUDAStream(); diff --git a/maskrcnn_benchmark/csrc/cuda/nms.cu b/maskrcnn_benchmark/csrc/cuda/nms.cu index 7bb0e50a1..833d8523a 100644 --- a/maskrcnn_benchmark/csrc/cuda/nms.cu +++ b/maskrcnn_benchmark/csrc/cuda/nms.cu @@ -1,7 +1,6 @@ // Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved. #include #include -#include #include #include @@ -71,8 +70,6 @@ __global__ void nms_kernel(const int n_boxes, const float nms_overlap_thresh, at::Tensor nms_cuda(const at::Tensor boxes, float nms_overlap_thresh) { using scalar_t = float; AT_ASSERTM(boxes.type().is_cuda(), "boxes must be a CUDA tensor"); - at::cuda::CUDAGuard device_guard(boxes.device()); - auto scores = boxes.select(1, 4); auto order_t = std::get<1>(scores.sort(0, /* descending=*/true)); auto boxes_sorted = boxes.index_select(0, order_t); From 90c226cf10e098263d1df28bda054a5f22513b4f Mon Sep 17 00:00:00 2001 From: Ouail Date: Sun, 31 Mar 2019 15:58:50 +0200 Subject: [PATCH 14/17] add the option to use a `FORCE_CUDA` to force cuda installation on docker (#612) * add a FORCE_CUDA flag Following discussion [here](https://github.com/facebookresearch/maskrcnn-benchmark/issues/167), this seemed the best solution * Update Dockerfile * Update setup.py * add FORCE_CUDA as an ARG * modified: docker/Dockerfile modified: setup.py * small fix to readme of demo * remove test print * keep ARG_CUDA * remove env value and use the one from ARG * keep same formatting as source * change proposed by @miguelvr * Update INSTALL.md --- INSTALL.md | 8 ++++++-- demo/README.md | 3 ++- docker/Dockerfile | 2 ++ setup.py | 2 +- 4 files changed, 11 insertions(+), 4 deletions(-) diff --git a/INSTALL.md b/INSTALL.md index 8365be8f9..4db4b5bb6 100644 --- a/INSTALL.md +++ b/INSTALL.md @@ -58,13 +58,17 @@ unset INSTALL_DIR ### Option 2: Docker Image (Requires CUDA, Linux only) -Build image with defaults (`CUDA=9.0`, `CUDNN=7`): +Build image with defaults (`CUDA=9.0`, `CUDNN=7`, `FORCE_CUDA=1`): nvidia-docker build -t maskrcnn-benchmark docker/ Build image with other CUDA and CUDNN versions: - nvidia-docker build -t maskrcnn-benchmark --build-arg CUDA=9.2 --build-arg CUDNN=7 docker/ + nvidia-docker build -t maskrcnn-benchmark --build-arg CUDA=9.2 --build-arg CUDNN=7 docker/ + +Build image with FORCE_CUDA disabled: + + nvidia-docker build -t maskrcnn-benchmark --build-arg FORCE_CUDA=0 docker/ Build and run image with built-in jupyter notebook(note that the password is used to log in jupyter notebook): diff --git a/demo/README.md b/demo/README.md index 393a064b0..5926f8d35 100644 --- a/demo/README.md +++ b/demo/README.md @@ -38,7 +38,8 @@ docker run --rm -it \ -v /tmp/.X11-unix:/tmp/.X11-unix \ --device=/dev/video0:/dev/video0 \ --ipc=host maskrcnn-benchmark \ - python demo/webcam.py --min-image-size 300 + python demo/webcam.py --min-image-size 300 \ + --config-file configs/caffe2/e2e_mask_rcnn_R_50_FPN_1x_caffe2.yaml ``` **DISCLAIMER:** *This was tested for an Ubuntu 16.04 machine, diff --git a/docker/Dockerfile b/docker/Dockerfile index 58b924cf4..ba92f2215 100644 --- a/docker/Dockerfile +++ b/docker/Dockerfile @@ -47,6 +47,8 @@ RUN git clone https://github.com/cocodataset/cocoapi.git \ && python setup.py build_ext install # install PyTorch Detection +ARG FORCE_CUDA="1" +ENV FORCE_CUDA=${FORCE_CUDA} RUN git clone https://github.com/facebookresearch/maskrcnn-benchmark.git \ && cd maskrcnn-benchmark \ && python setup.py build develop diff --git a/setup.py b/setup.py index bfb6845e5..837c2cd15 100644 --- a/setup.py +++ b/setup.py @@ -28,7 +28,7 @@ def get_extensions(): extra_compile_args = {"cxx": []} define_macros = [] - if torch.cuda.is_available() and CUDA_HOME is not None: + if (torch.cuda.is_available() and CUDA_HOME is not None) or os.getenv("FORCE_CUDA", "0") == "1": extension = CUDAExtension sources += source_cuda define_macros += [("WITH_CUDA", None)] From f55844c9aeb5e4d5e6b148c30cd87c64895dd268 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Yihui=20He=20=E4=BD=95=E5=AE=9C=E6=99=96?= Date: Tue, 2 Apr 2019 04:38:41 -0400 Subject: [PATCH 15/17] fix resnet.py typo (#626) --- maskrcnn_benchmark/modeling/backbone/resnet.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/maskrcnn_benchmark/modeling/backbone/resnet.py b/maskrcnn_benchmark/modeling/backbone/resnet.py index 15d96720c..aaa438026 100644 --- a/maskrcnn_benchmark/modeling/backbone/resnet.py +++ b/maskrcnn_benchmark/modeling/backbone/resnet.py @@ -33,7 +33,7 @@ "StageSpec", [ "index", # Index of the stage, eg 1, 2, ..,. 5 - "block_count", # Numer of residual blocks in the stage + "block_count", # Number of residual blocks in the stage "return_features", # True => return the last feature map from this stage ], ) From 5c44ca7414b5c744aeda6d8bfb60d1de6d99c049 Mon Sep 17 00:00:00 2001 From: Zhang Liliang Date: Thu, 4 Apr 2019 22:19:53 +0800 Subject: [PATCH 16/17] Fix a bug in Docker file (#637) Fix a bug. Romove the echo command in line 36: RUN conda install pytorch-nightly cudatoolkit=${CUDA} -c pytorch To enable conda installation of pytorch-nightly. --- docker/Dockerfile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docker/Dockerfile b/docker/Dockerfile index ba92f2215..ebb1502e4 100644 --- a/docker/Dockerfile +++ b/docker/Dockerfile @@ -33,7 +33,7 @@ RUN pip install ninja yacs cython matplotlib opencv-python # Install PyTorch 1.0 Nightly ARG CUDA -RUN echo conda install pytorch-nightly cudatoolkit=${CUDA} -c pytorch \ +RUN conda install pytorch-nightly cudatoolkit=${CUDA} -c pytorch \ && conda clean -ya # Install TorchVision master From f917a555bc422ed5e06a402e739da0e21b00d0b5 Mon Sep 17 00:00:00 2001 From: Zhang Liliang Date: Fri, 5 Apr 2019 02:31:55 +0800 Subject: [PATCH 17/17] Add tqdm package in Dockerfile (#638) add tqdm in line32 : RUN pip install ninja yacs cython matplotlib opencv-python tqdm --- docker/Dockerfile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docker/Dockerfile b/docker/Dockerfile index ebb1502e4..11a2d370a 100644 --- a/docker/Dockerfile +++ b/docker/Dockerfile @@ -29,7 +29,7 @@ ENV PATH=$CONDA_PREFIX/bin:$PATH ENV CONDA_AUTO_UPDATE_CONDA=false RUN conda install -y ipython -RUN pip install ninja yacs cython matplotlib opencv-python +RUN pip install ninja yacs cython matplotlib opencv-python tqdm # Install PyTorch 1.0 Nightly ARG CUDA