diff --git a/.gitignore b/.gitignore
index d52f92b83..7fe9a046b 100644
--- a/.gitignore
+++ b/.gitignore
@@ -25,6 +25,9 @@ dist/
 # Pycharm editor settings
 .idea
 
+# vscode editor settings
+.vscode
+
 # MacOS
 .DS_Store
 
diff --git a/docker/docker-jupyter/jupyter_notebook_config.py b/docker/docker-jupyter/jupyter_notebook_config.py
index bd5494812..e8fbe7de4 100644
--- a/docker/docker-jupyter/jupyter_notebook_config.py
+++ b/docker/docker-jupyter/jupyter_notebook_config.py
@@ -1,7 +1,7 @@
 import os
 from IPython.lib import passwd
 
-#c = c  # pylint:disable=undefined-variable
+# c = c  # pylint:disable=undefined-variable
 c = get_config()
 c.NotebookApp.ip = '0.0.0.0'
 c.NotebookApp.port = int(os.getenv('PORT', 8888))
@@ -9,10 +9,10 @@
 
 # sets a password if PASSWORD is set in the environment
 if 'PASSWORD' in os.environ:
-  password = os.environ['PASSWORD']
-  if password:
-    c.NotebookApp.password = passwd(password)
-  else:
-    c.NotebookApp.password = ''
-    c.NotebookApp.token = ''
-  del os.environ['PASSWORD']
+    password = os.environ['PASSWORD']
+    if password:
+        c.NotebookApp.password = passwd(password)
+    else:
+        c.NotebookApp.password = ''
+        c.NotebookApp.token = ''
+    del os.environ['PASSWORD']
diff --git a/maskrcnn_benchmark/data/datasets/voc.py b/maskrcnn_benchmark/data/datasets/voc.py
index 459985bd1..ad20a8721 100644
--- a/maskrcnn_benchmark/data/datasets/voc.py
+++ b/maskrcnn_benchmark/data/datasets/voc.py
@@ -89,7 +89,7 @@ def _preprocess_annotation(self, target):
         gt_classes = []
         difficult_boxes = []
         TO_REMOVE = 1
-        
+
         for obj in target.iter("object"):
             difficult = int(obj.find("difficult").text) == 1
             if not self.keep_difficult and difficult:
@@ -99,9 +99,9 @@ def _preprocess_annotation(self, target):
             # Make pixel indexes 0-based
             # Refer to "https://github.com/rbgirshick/py-faster-rcnn/blob/master/lib/datasets/pascal_voc.py#L208-L211"
             box = [
-                bb.find("xmin").text, 
-                bb.find("ymin").text, 
-                bb.find("xmax").text, 
+                bb.find("xmin").text,
+                bb.find("ymin").text,
+                bb.find("xmax").text,
                 bb.find("ymax").text,
             ]
             bndbox = tuple(
diff --git a/maskrcnn_benchmark/layers/dcn/__init__.py b/maskrcnn_benchmark/layers/dcn/__init__.py
index 22fe18ff3..bb5af25d4 100644
--- a/maskrcnn_benchmark/layers/dcn/__init__.py
+++ b/maskrcnn_benchmark/layers/dcn/__init__.py
@@ -1,3 +1,3 @@
-# 
+#
 # Copied From [mmdetection](https://github.com/open-mmlab/mmdetection/tree/master/mmdet/ops/dcn)
-# 
\ No newline at end of file
+#
diff --git a/maskrcnn_benchmark/layers/dcn/deform_conv_func.py b/maskrcnn_benchmark/layers/dcn/deform_conv_func.py
index a276a05fe..388bacf12 100644
--- a/maskrcnn_benchmark/layers/dcn/deform_conv_func.py
+++ b/maskrcnn_benchmark/layers/dcn/deform_conv_func.py
@@ -10,15 +10,15 @@ class DeformConvFunction(Function):
 
     @staticmethod
     def forward(
-        ctx, 
-        input, 
-        offset, 
+        ctx,
+        input,
+        offset,
         weight,
-        stride=1, 
-        padding=0, 
-        dilation=1, 
-        groups=1, 
-        deformable_groups=1, 
+        stride=1,
+        padding=0,
+        dilation=1,
+        groups=1,
+        deformable_groups=1,
         im2col_step=64
     ):
         if input is not None and input.dim() != 4:
@@ -47,21 +47,21 @@ def forward(
             assert (input.shape[0] %
                     cur_im2col_step) == 0, 'im2col step must divide batchsize'
             _C.deform_conv_forward(
-                input, 
-                weight, 
-                offset, 
-                output, 
-                ctx.bufs_[0], 
+                input,
+                weight,
+                offset,
+                output,
+                ctx.bufs_[0],
                 ctx.bufs_[1],
-                weight.size(3), 
-                weight.size(2), 
-                ctx.stride[1], 
+                weight.size(3),
+                weight.size(2),
+                ctx.stride[1],
                 ctx.stride[0],
-                ctx.padding[1], 
-                ctx.padding[0], 
+                ctx.padding[1],
+                ctx.padding[0],
                 ctx.dilation[1],
-                ctx.dilation[0], 
-                ctx.groups, 
+                ctx.dilation[0],
+                ctx.groups,
                 ctx.deformable_groups,
                 cur_im2col_step
             )
@@ -85,22 +85,22 @@ def backward(ctx, grad_output):
                 grad_input = torch.zeros_like(input)
                 grad_offset = torch.zeros_like(offset)
                 _C.deform_conv_backward_input(
-                    input, 
-                    offset, 
-                    grad_output, 
+                    input,
+                    offset,
+                    grad_output,
                     grad_input,
-                    grad_offset, 
-                    weight, 
-                    ctx.bufs_[0], 
+                    grad_offset,
+                    weight,
+                    ctx.bufs_[0],
                     weight.size(3),
-                    weight.size(2), 
-                    ctx.stride[1], 
+                    weight.size(2),
+                    ctx.stride[1],
                     ctx.stride[0],
-                    ctx.padding[1], 
-                    ctx.padding[0], 
+                    ctx.padding[1],
+                    ctx.padding[0],
                     ctx.dilation[1],
-                    ctx.dilation[0], 
-                    ctx.groups, 
+                    ctx.dilation[0],
+                    ctx.groups,
                     ctx.deformable_groups,
                     cur_im2col_step
                 )
@@ -108,22 +108,22 @@ def backward(ctx, grad_output):
             if ctx.needs_input_grad[2]:
                 grad_weight = torch.zeros_like(weight)
                 _C.deform_conv_backward_parameters(
-                    input, 
-                    offset, 
+                    input,
+                    offset,
                     grad_output,
-                    grad_weight, 
-                    ctx.bufs_[0], 
-                    ctx.bufs_[1], 
+                    grad_weight,
+                    ctx.bufs_[0],
+                    ctx.bufs_[1],
                     weight.size(3),
-                    weight.size(2), 
-                    ctx.stride[1], 
+                    weight.size(2),
+                    ctx.stride[1],
                     ctx.stride[0],
-                    ctx.padding[1], 
-                    ctx.padding[0], 
+                    ctx.padding[1],
+                    ctx.padding[0],
                     ctx.dilation[1],
-                    ctx.dilation[0], 
-                    ctx.groups, 
-                    ctx.deformable_groups, 
+                    ctx.dilation[0],
+                    ctx.groups,
+                    ctx.deformable_groups,
                     1,
                     cur_im2col_step
                 )
@@ -180,24 +180,24 @@ def forward(
             ModulatedDeformConvFunction._infer_shape(ctx, input, weight))
         ctx._bufs = [input.new_empty(0), input.new_empty(0)]
         _C.modulated_deform_conv_forward(
-            input, 
-            weight, 
-            bias, 
-            ctx._bufs[0], 
-            offset, 
-            mask, 
+            input,
+            weight,
+            bias,
+            ctx._bufs[0],
+            offset,
+            mask,
             output,
-            ctx._bufs[1], 
-            weight.shape[2], 
-            weight.shape[3], 
+            ctx._bufs[1],
+            weight.shape[2],
+            weight.shape[3],
+            ctx.stride,
             ctx.stride,
-            ctx.stride, 
-            ctx.padding, 
-            ctx.padding, 
-            ctx.dilation, 
+            ctx.padding,
+            ctx.padding,
             ctx.dilation,
-            ctx.groups, 
-            ctx.deformable_groups, 
+            ctx.dilation,
+            ctx.groups,
+            ctx.deformable_groups,
             ctx.with_bias
         )
         return output
@@ -214,29 +214,29 @@ def backward(ctx, grad_output):
         grad_weight = torch.zeros_like(weight)
         grad_bias = torch.zeros_like(bias)
         _C.modulated_deform_conv_backward(
-            input, 
-            weight, 
-            bias, 
-            ctx._bufs[0], 
-            offset, 
-            mask, 
+            input,
+            weight,
+            bias,
+            ctx._bufs[0],
+            offset,
+            mask,
             ctx._bufs[1],
-            grad_input, 
-            grad_weight, 
-            grad_bias, 
-            grad_offset, 
+            grad_input,
+            grad_weight,
+            grad_bias,
+            grad_offset,
             grad_mask,
-            grad_output, 
-            weight.shape[2], 
-            weight.shape[3], 
+            grad_output,
+            weight.shape[2],
+            weight.shape[3],
+            ctx.stride,
             ctx.stride,
-            ctx.stride, 
-            ctx.padding, 
-            ctx.padding, 
-            ctx.dilation, 
+            ctx.padding,
+            ctx.padding,
+            ctx.dilation,
             ctx.dilation,
-            ctx.groups, 
-            ctx.deformable_groups, 
+            ctx.groups,
+            ctx.deformable_groups,
             ctx.with_bias
         )
         if not ctx.with_bias:
diff --git a/maskrcnn_benchmark/layers/dcn/deform_pool_func.py b/maskrcnn_benchmark/layers/dcn/deform_pool_func.py
index 2f7810b23..e083b002e 100644
--- a/maskrcnn_benchmark/layers/dcn/deform_pool_func.py
+++ b/maskrcnn_benchmark/layers/dcn/deform_pool_func.py
@@ -39,18 +39,18 @@ def forward(
         output = data.new_empty(n, out_channels, out_size, out_size)
         output_count = data.new_empty(n, out_channels, out_size, out_size)
         _C.deform_psroi_pooling_forward(
-            data, 
-            rois, 
-            offset, 
-            output, 
-            output_count, 
+            data,
+            rois,
+            offset,
+            output,
+            output_count,
             ctx.no_trans,
-            ctx.spatial_scale, 
-            ctx.out_channels, 
-            ctx.group_size, 
+            ctx.spatial_scale,
+            ctx.out_channels,
+            ctx.group_size,
             ctx.out_size,
-            ctx.part_size, 
-            ctx.sample_per_part, 
+            ctx.part_size,
+            ctx.sample_per_part,
             ctx.trans_std
         )
 
@@ -73,19 +73,19 @@ def backward(ctx, grad_output):
         grad_offset = torch.zeros_like(offset)
 
         _C.deform_psroi_pooling_backward(
-            grad_output, 
-            data, 
-            rois, 
-            offset, 
-            output_count, 
+            grad_output,
+            data,
+            rois,
+            offset,
+            output_count,
             grad_input,
-            grad_offset, 
-            ctx.no_trans, 
-            ctx.spatial_scale, 
+            grad_offset,
+            ctx.no_trans,
+            ctx.spatial_scale,
             ctx.out_channels,
-            ctx.group_size, 
-            ctx.out_size, 
-            ctx.part_size, 
+            ctx.group_size,
+            ctx.out_size,
+            ctx.part_size,
             ctx.sample_per_part,
             ctx.trans_std
         )
diff --git a/maskrcnn_benchmark/layers/misc.py b/maskrcnn_benchmark/layers/misc.py
index b64f23840..871132419 100644
--- a/maskrcnn_benchmark/layers/misc.py
+++ b/maskrcnn_benchmark/layers/misc.py
@@ -114,12 +114,12 @@ def _output_size(dim):
 class DFConv2d(nn.Module):
     """Deformable convolutional layer"""
     def __init__(
-        self, 
-        in_channels, 
-        out_channels, 
-        with_modulated_dcn=True, 
-        kernel_size=3, 
-        stride=1, 
+        self,
+        in_channels,
+        out_channels,
+        with_modulated_dcn=True,
+        kernel_size=3,
+        stride=1,
         groups=1,
         dilation=1,
         deformable_groups=1,
@@ -156,7 +156,7 @@ def __init__(
             padding=padding,
             groups=1,
             dilation=dilation
-        )           
+        )
         for l in [self.offset,]:
             nn.init.kaiming_uniform_(l.weight, a=1)
             torch.nn.init.constant_(l.bias, 0.)
@@ -192,10 +192,10 @@ def forward(self, x):
         output_shape = [
             (i + 2 * p - (di * (k - 1) + 1)) // d + 1
             for i, p, di, k, d in zip(
-                x.shape[-2:], 
-                self.padding, 
-                self.dilation, 
-                self.kernel_size, 
+                x.shape[-2:],
+                self.padding,
+                self.dilation,
+                self.kernel_size,
                 self.stride
             )
         ]
diff --git a/maskrcnn_benchmark/modeling/backbone/resnet.py b/maskrcnn_benchmark/modeling/backbone/resnet.py
index fc02dc1e8..147531091 100644
--- a/maskrcnn_benchmark/modeling/backbone/resnet.py
+++ b/maskrcnn_benchmark/modeling/backbone/resnet.py
@@ -288,11 +288,11 @@ def __init__(
             deformable_groups = dcn_config.get("deformable_groups", 1)
             with_modulated_dcn = dcn_config.get("with_modulated_dcn", False)
             self.conv2 = DFConv2d(
-                bottleneck_channels, 
-                bottleneck_channels, 
-                with_modulated_dcn=with_modulated_dcn, 
-                kernel_size=3, 
-                stride=stride_3x3, 
+                bottleneck_channels,
+                bottleneck_channels,
+                with_modulated_dcn=with_modulated_dcn,
+                kernel_size=3,
+                stride=stride_3x3,
                 groups=num_groups,
                 dilation=dilation,
                 deformable_groups=deformable_groups,
diff --git a/maskrcnn_benchmark/modeling/make_layers.py b/maskrcnn_benchmark/modeling/make_layers.py
index 74e56b0e2..049aee6d1 100644
--- a/maskrcnn_benchmark/modeling/make_layers.py
+++ b/maskrcnn_benchmark/modeling/make_layers.py
@@ -34,29 +34,29 @@ def group_norm(out_channels, affine=True, divisor=1):
     num_groups = cfg.MODEL.GROUP_NORM.NUM_GROUPS // divisor
     eps = cfg.MODEL.GROUP_NORM.EPSILON # default: 1e-5
     return torch.nn.GroupNorm(
-        get_group_gn(out_channels, dim_per_gp, num_groups), 
-        out_channels, 
-        eps, 
+        get_group_gn(out_channels, dim_per_gp, num_groups),
+        out_channels,
+        eps,
         affine
     )
 
 
 def make_conv3x3(
-    in_channels, 
-    out_channels, 
-    dilation=1, 
-    stride=1, 
+    in_channels,
+    out_channels,
+    dilation=1,
+    stride=1,
     use_gn=False,
     use_relu=False,
     kaiming_init=True
 ):
     conv = Conv2d(
-        in_channels, 
-        out_channels, 
-        kernel_size=3, 
-        stride=stride, 
-        padding=dilation, 
-        dilation=dilation, 
+        in_channels,
+        out_channels,
+        kernel_size=3,
+        stride=stride,
+        padding=dilation,
+        dilation=dilation,
         bias=False if use_gn else True
     )
     if kaiming_init:
@@ -97,12 +97,12 @@ def make_conv(
         in_channels, out_channels, kernel_size, stride=1, dilation=1
     ):
         conv = Conv2d(
-            in_channels, 
-            out_channels, 
-            kernel_size=kernel_size, 
-            stride=stride, 
-            padding=dilation * (kernel_size - 1) // 2, 
-            dilation=dilation, 
+            in_channels,
+            out_channels,
+            kernel_size=kernel_size,
+            stride=stride,
+            padding=dilation * (kernel_size - 1) // 2,
+            dilation=dilation,
             bias=False if use_gn else True
         )
         # Caffe2 implementation uses XavierFill, which in fact
diff --git a/maskrcnn_benchmark/modeling/roi_heads/box_head/loss.py b/maskrcnn_benchmark/modeling/roi_heads/box_head/loss.py
index 9f2771d02..a1fdd2308 100644
--- a/maskrcnn_benchmark/modeling/roi_heads/box_head/loss.py
+++ b/maskrcnn_benchmark/modeling/roi_heads/box_head/loss.py
@@ -19,10 +19,10 @@ class FastRCNNLossComputation(object):
     """
 
     def __init__(
-        self, 
-        proposal_matcher, 
-        fg_bg_sampler, 
-        box_coder, 
+        self,
+        proposal_matcher,
+        fg_bg_sampler,
+        box_coder,
         cls_agnostic_bbox_reg=False
     ):
         """
@@ -184,9 +184,9 @@ def make_roi_box_loss_evaluator(cfg):
     cls_agnostic_bbox_reg = cfg.MODEL.CLS_AGNOSTIC_BBOX_REG
 
     loss_evaluator = FastRCNNLossComputation(
-        matcher, 
-        fg_bg_sampler, 
-        box_coder, 
+        matcher,
+        fg_bg_sampler,
+        box_coder,
         cls_agnostic_bbox_reg
     )
 
diff --git a/maskrcnn_benchmark/structures/segmentation_mask.py b/maskrcnn_benchmark/structures/segmentation_mask.py
index 364d01eb5..4547398a0 100644
--- a/maskrcnn_benchmark/structures/segmentation_mask.py
+++ b/maskrcnn_benchmark/structures/segmentation_mask.py
@@ -3,7 +3,7 @@
 import torch
 import numpy as np
 from maskrcnn_benchmark.layers.misc import interpolate
-
+from maskrcnn_benchmark.utils import cv2_util
 import pycocotools.mask as mask_utils
 
 # transpose
@@ -148,7 +148,7 @@ def _findContours(self):
         masks = self.masks.detach().numpy()
         for mask in masks:
             mask = cv2.UMat(mask)
-            contour, hierarchy = cv2.findContours(
+            contour, hierarchy = cv2_util.findContours(
                 mask, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_TC89_L1
             )
 
diff --git a/tests/test_segmentation_mask.py b/tests/test_segmentation_mask.py
index d01ed9452..3f70ed551 100644
--- a/tests/test_segmentation_mask.py
+++ b/tests/test_segmentation_mask.py
@@ -20,13 +20,11 @@ def __init__(self, method_name='runTest'):
         self.P = SegmentationMask(poly, size, 'poly')
         self.M = SegmentationMask(poly, size, 'poly').convert('mask')
 
-
     def L1(self, A, B):
         diff = A.get_mask_tensor() - B.get_mask_tensor()
         diff = torch.sum(torch.abs(diff.float())).item()
         return diff
 
-
     def test_convert(self):
         M_hat = self.M.convert('poly').convert('mask')
         P_hat = self.P.convert('mask').convert('poly')
@@ -37,13 +35,11 @@ def test_convert(self):
         self.assertTrue(diff_mask <= 8169.)
         self.assertTrue(diff_poly <= 8169.)
 
-
     def test_crop(self):
         box = [400, 250, 500, 300] # xyxy
         diff = self.L1(self.M.crop(box), self.P.crop(box))
         self.assertTrue(diff <= 1.)
 
-
     def test_resize(self):
         new_size = 50, 25
         M_hat = self.M.resize(new_size)
@@ -55,7 +51,6 @@ def test_resize(self):
         self.assertTrue(self.M.size != M_hat.size)
         self.assertTrue(diff <= 255.)
 
-
     def test_transpose(self):
         FLIP_LEFT_RIGHT = 0
         FLIP_TOP_BOTTOM = 1