-
Notifications
You must be signed in to change notification settings - Fork 2.5k
Add RetinaNet Implementation #102
Changes from 28 commits
e185c79
6167fa4
99920af
9e82436
69d5d3a
587ccd8
89e35b2
bd9a817
882655a
b5ca053
a1f7365
6cc7264
dca2453
ce06ecd
615af53
a859b1e
2e9881f
21a84e7
ee7760f
adb25d6
b84ff0e
b3af003
911196f
5fc4b75
c8c4bc7
cfe06d8
bac17d6
9e2baa7
a8c919a
190e132
8bab238
51bbb17
b328d22
0dac79d
328ea98
0177419
128c491
dc73a33
a51abdc
5c7b391
bec7cc1
77e3626
a8ba755
dbbb6f9
997ae29
8c93b1d
fb3fe10
096f0d6
da19923
afd5f0b
c235307
6389130
26c707e
45372c4
3bb285d
37e9075
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,48 @@ | ||
MODEL: | ||
META_ARCHITECTURE: "GeneralizedRCNN" | ||
WEIGHT: "catalog://ImageNetPretrained/MSRA/R-101" | ||
RPN_ONLY: True | ||
RETINANET_ON: True | ||
BACKBONE: | ||
CONV_BODY: "R-101-FPN-RETINANET" | ||
OUT_CHANNELS: 256 | ||
RPN: | ||
USE_FPN: True | ||
FG_IOU_THRESHOLD: 0.5 | ||
BG_IOU_THRESHOLD: 0.4 | ||
ANCHOR_STRIDE: (4, 8, 16, 32, 64) | ||
PRE_NMS_TOP_N_TRAIN: 2000 | ||
PRE_NMS_TOP_N_TEST: 1000 | ||
POST_NMS_TOP_N_TEST: 1000 | ||
FPN_POST_NMS_TOP_N_TEST: 1000 | ||
ROI_HEADS: | ||
USE_FPN: True | ||
BATCH_SIZE_PER_IMAGE: 256 | ||
ROI_BOX_HEAD: | ||
POOLER_RESOLUTION: 7 | ||
POOLER_SCALES: (0.25, 0.125, 0.0625, 0.03125) | ||
POOLER_SAMPLING_RATIO: 2 | ||
FEATURE_EXTRACTOR: "FPN2MLPFeatureExtractor" | ||
PREDICTOR: "FPNPredictor" | ||
RETINANET: | ||
SCALES_PER_OCTAVE: 3 | ||
STRADDLE_THRESH: -1 | ||
DATASETS: | ||
TRAIN: ("coco_2017_train",) | ||
TEST: ("coco_2017_val",) | ||
INPUT: | ||
MIN_SIZE_TRAIN: (800, ) | ||
MAX_SIZE_TRAIN: 1333 | ||
MIN_SIZE_TEST: 800 | ||
MAX_SIZE_TEST: 1333 | ||
DATALOADER: | ||
SIZE_DIVISIBILITY: 32 | ||
SOLVER: | ||
# Assume 4 gpus | ||
BASE_LR: 0.005 | ||
WEIGHT_DECAY: 0.0001 | ||
STEPS: (120000, 160000) | ||
MAX_ITER: 180000 | ||
IMS_PER_BATCH: 8 | ||
|
||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,46 @@ | ||
MODEL: | ||
META_ARCHITECTURE: "GeneralizedRCNN" | ||
WEIGHT: "catalog://ImageNetPretrained/MSRA/R-50" | ||
RPN_ONLY: True | ||
RETINANET_ON: True | ||
BACKBONE: | ||
CONV_BODY: "R-50-FPN-RETINANET" | ||
OUT_CHANNELS: 256 | ||
RPN: | ||
USE_FPN: True | ||
FG_IOU_THRESHOLD: 0.5 | ||
BG_IOU_THRESHOLD: 0.4 | ||
ANCHOR_STRIDE: (4, 8, 16, 32, 64) | ||
PRE_NMS_TOP_N_TRAIN: 2000 | ||
PRE_NMS_TOP_N_TEST: 1000 | ||
POST_NMS_TOP_N_TEST: 1000 | ||
FPN_POST_NMS_TOP_N_TEST: 1000 | ||
ROI_HEADS: | ||
USE_FPN: True | ||
BATCH_SIZE_PER_IMAGE: 256 | ||
ROI_BOX_HEAD: | ||
POOLER_RESOLUTION: 7 | ||
POOLER_SCALES: (0.25, 0.125, 0.0625, 0.03125) | ||
POOLER_SAMPLING_RATIO: 2 | ||
FEATURE_EXTRACTOR: "FPN2MLPFeatureExtractor" | ||
PREDICTOR: "FPNPredictor" | ||
RETINANET: | ||
SCALES_PER_OCTAVE: 3 | ||
STRADDLE_THRESH: -1 | ||
DATASETS: | ||
TRAIN: ("coco_2017_train",) | ||
TEST: ("coco_2017_val",) | ||
INPUT: | ||
MIN_SIZE_TRAIN: (800,) | ||
MAX_SIZE_TRAIN: 1333 | ||
MIN_SIZE_TEST: 800 | ||
MAX_SIZE_TEST: 1333 | ||
DATALOADER: | ||
SIZE_DIVISIBILITY: 32 | ||
SOLVER: | ||
# Assume 4 gpus | ||
BASE_LR: 0.01 | ||
WEIGHT_DECAY: 0.0001 | ||
STEPS: (60000, 80000) | ||
MAX_ITER: 90000 | ||
IMS_PER_BATCH: 16 |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,52 @@ | ||
MODEL: | ||
META_ARCHITECTURE: "GeneralizedRCNN" | ||
WEIGHT: "catalog://ImageNetPretrained/FAIR/20171220/X-101-32x8d" | ||
RPN_ONLY: True | ||
RETINANET_ON: True | ||
BACKBONE: | ||
CONV_BODY: "R-101-FPN-RETINANET" | ||
OUT_CHANNELS: 256 | ||
RPN: | ||
USE_FPN: True | ||
FG_IOU_THRESHOLD: 0.5 | ||
BG_IOU_THRESHOLD: 0.4 | ||
ANCHOR_STRIDE: (4, 8, 16, 32, 64) | ||
PRE_NMS_TOP_N_TRAIN: 2000 | ||
PRE_NMS_TOP_N_TEST: 1000 | ||
POST_NMS_TOP_N_TEST: 1000 | ||
FPN_POST_NMS_TOP_N_TEST: 1000 | ||
ROI_HEADS: | ||
USE_FPN: True | ||
BATCH_SIZE_PER_IMAGE: 256 | ||
ROI_BOX_HEAD: | ||
POOLER_RESOLUTION: 7 | ||
POOLER_SCALES: (0.25, 0.125, 0.0625, 0.03125) | ||
POOLER_SAMPLING_RATIO: 2 | ||
FEATURE_EXTRACTOR: "FPN2MLPFeatureExtractor" | ||
PREDICTOR: "FPNPredictor" | ||
RESNETS: | ||
STRIDE_IN_1X1: False | ||
NUM_GROUPS: 32 | ||
WIDTH_PER_GROUP: 8 | ||
RETINANET: | ||
SCALES_PER_OCTAVE: 3 | ||
STRADDLE_THRESH: -1 | ||
DATASETS: | ||
TRAIN: ("coco_2017_train",) | ||
TEST: ("coco_2017_val",) | ||
INPUT: | ||
MIN_SIZE_TRAIN: (800, ) | ||
MAX_SIZE_TRAIN: 1333 | ||
MIN_SIZE_TEST: 800 | ||
MAX_SIZE_TEST: 1333 | ||
DATALOADER: | ||
SIZE_DIVISIBILITY: 32 | ||
SOLVER: | ||
# Assume 4 gpus | ||
BASE_LR: 0.005 | ||
WEIGHT_DECAY: 0.0001 | ||
STEPS: (120000, 160000) | ||
MAX_ITER: 180000 | ||
IMS_PER_BATCH: 8 | ||
|
||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -23,6 +23,7 @@ | |
_C.MODEL = CN() | ||
_C.MODEL.RPN_ONLY = False | ||
_C.MODEL.MASK_ON = False | ||
_C.MODEL.RETINANET_ON = False | ||
_C.MODEL.DEVICE = "cuda" | ||
_C.MODEL.META_ARCHITECTURE = "GeneralizedRCNN" | ||
|
||
|
@@ -37,7 +38,7 @@ | |
# ----------------------------------------------------------------------------- | ||
_C.INPUT = CN() | ||
# Size of the smallest side of the image during training | ||
_C.INPUT.MIN_SIZE_TRAIN = 800 # (800,) | ||
_C.INPUT.MIN_SIZE_TRAIN = (800,) # 800 | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. We are currently not using this I believe, and it conflicts with the changes in the Keypoints. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I am totally okay with that. |
||
# Maximum size of the side of the image during training | ||
_C.INPUT.MAX_SIZE_TRAIN = 1333 | ||
# Size of the smallest side of the image during testing | ||
|
@@ -223,6 +224,64 @@ | |
_C.MODEL.RESNETS.RES2_OUT_CHANNELS = 256 | ||
_C.MODEL.RESNETS.STEM_OUT_CHANNELS = 64 | ||
|
||
|
||
# ---------------------------------------------------------------------------- # | ||
# RetinaNet Options (Follow the Detectron version) | ||
# ---------------------------------------------------------------------------- # | ||
_C.MODEL.RETINANET = CN() | ||
|
||
# This is the number of foreground classes, background is not included. | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. This comment seems strange given that we have 80 classes in COCO There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Actually, do we need to keep in two different places the number of classes? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Yes, the comment is wrong. For the second question, definitely, one place is much better. But I would like it is under MODEL instead of MODEL.ROI_BOX_HEAD.NUM_CLASSES. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Oh yes, Also, I haven't looked closely at the retinanet implementation of Detectron, but don't we have a There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I think MODEL.NUM_CLASSES would be a good choice. But it needs changing many parts of current Faster/Mask R-CNN. I think it would make this PR be too complicated. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I'd rather keep the Thanks for the explanation on the second question! There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I think that makes sense. |
||
_C.MODEL.RETINANET.NUM_CLASSES = 81 | ||
|
||
# Anchor aspect ratios to use | ||
_C.MODEL.RETINANET.ANCHOR_SIZES = (32, 64, 128, 256, 512) | ||
_C.MODEL.RETINANET.ASPECT_RATIOS = (0.5, 1.0, 2.0) | ||
_C.MODEL.RETINANET.ANCHOR_STRIDES = (8, 16, 32, 64, 128) | ||
_C.MODEL.RETINANET.STRADDLE_THRESH = 0 | ||
|
||
# Anchor scales per octave | ||
_C.MODEL.RETINANET.OCTAVE = 2.0 | ||
_C.MODEL.RETINANET.SCALES_PER_OCTAVE = 3 | ||
|
||
# Convolutions to use in the cls and bbox tower | ||
# NOTE: this doesn't include the last conv for logits | ||
_C.MODEL.RETINANET.NUM_CONVS = 4 | ||
|
||
# Weight for bbox_regression loss | ||
_C.MODEL.RETINANET.BBOX_REG_WEIGHT = 1.0 | ||
|
||
# Smooth L1 loss beta for bbox regression | ||
_C.MODEL.RETINANET.BBOX_REG_BETA = 0.11 | ||
|
||
# During inference, #locs to select based on cls score before NMS is performed | ||
# per FPN level | ||
_C.MODEL.RETINANET.PRE_NMS_TOP_N = 1000 | ||
|
||
# IoU overlap ratio for labeling an anchor as positive | ||
# Anchors with >= iou overlap are labeled positive | ||
_C.MODEL.RETINANET.POSITIVE_OVERLAP = 0.5 | ||
|
||
# IoU overlap ratio for labeling an anchor as negative | ||
# Anchors with < iou overlap are labeled negative | ||
_C.MODEL.RETINANET.NEGATIVE_OVERLAP = 0.4 | ||
|
||
# Focal loss parameter: alpha | ||
_C.MODEL.RETINANET.LOSS_ALPHA = 0.25 | ||
|
||
# Focal loss parameter: gamma | ||
_C.MODEL.RETINANET.LOSS_GAMMA = 2.0 | ||
|
||
# Prior prob for the positives at the beginning of training. This is used to set | ||
# the bias init for the logits layer | ||
_C.MODEL.RETINANET.PRIOR_PROB = 0.01 | ||
|
||
# Inference cls score threshold, anchors with score > INFERENCE_TH are | ||
# considered for inference | ||
_C.MODEL.RETINANET.INFERENCE_TH = 0.05 | ||
|
||
# NMS threshold used in RetinaNet | ||
_C.MODEL.RETINANET.NMS_TH = 0.4 | ||
|
||
# ---------------------------------------------------------------------------- # | ||
# Solver | ||
# ---------------------------------------------------------------------------- # | ||
|
@@ -261,6 +320,8 @@ | |
# This is global, so if we have 8 GPUs and IMS_PER_BATCH = 16, each GPU will | ||
# see 2 images per batch | ||
_C.TEST.IMS_PER_BATCH = 8 | ||
# Number of detections per image | ||
_C.TEST.DETECTIONS_PER_IMG = 100 | ||
|
||
|
||
# ---------------------------------------------------------------------------- # | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,41 @@ | ||
#pragma once | ||
|
||
#include "cpu/vision.h" | ||
|
||
#ifdef WITH_CUDA | ||
#include "cuda/vision.h" | ||
#endif | ||
|
||
// Interface for Python | ||
at::Tensor SigmoidFocalLoss_forward( | ||
const at::Tensor& logits, | ||
const at::Tensor& targets, | ||
const int num_classes, | ||
const float gamma, | ||
const float alpha) { | ||
if (logits.type().is_cuda()) { | ||
#ifdef WITH_CUDA | ||
return SigmoidFocalLoss_forward_cuda(logits, targets, num_classes, gamma, alpha); | ||
#else | ||
AT_ERROR("Not compiled with GPU support"); | ||
#endif | ||
} | ||
AT_ERROR("Not implemented on the CPU"); | ||
} | ||
|
||
at::Tensor SigmoidFocalLoss_backward( | ||
const at::Tensor& logits, | ||
const at::Tensor& targets, | ||
const at::Tensor& d_losses, | ||
const int num_classes, | ||
const float gamma, | ||
const float alpha) { | ||
if (logits.type().is_cuda()) { | ||
#ifdef WITH_CUDA | ||
return SigmoidFocalLoss_backward_cuda(logits, targets, d_losses, num_classes, gamma, alpha); | ||
#else | ||
AT_ERROR("Not compiled with GPU support"); | ||
#endif | ||
} | ||
AT_ERROR("Not implemented on the CPU"); | ||
} |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I think “PRN_ONLY” should be False, due to this option maybe affect the eval process.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Hi, I think this does not affect anything in the evaluation. RPN_ONLY is not used, once the RETINANET_ON is set as True.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I'm really got some errors.
Problem maybe occur on here:
maskrcnn-benchmark/maskrcnn_benchmark/data/datasets/evaluation/coco/coco_eval.py
Line 24 in 0dfac37
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Thanks for letting me know. I am on vacation now but will check it once I am back.