From ef177c56f853f409bad11f07ac515a175555da0f Mon Sep 17 00:00:00 2001
From: Vasilis Vryniotis <datumbox@users.noreply.github.com>
Date: Fri, 20 Aug 2021 14:15:51 +0100
Subject: [PATCH 01/12] Add StochasticDepth implementation (#4301)

* Adding operator.

* Adding tests

* switching order of `p` and `mode`.

* Remove seed setting.

* Replace stats import with pytest.importorskip.

* Fix doc

* Apply suggestions from code review

Co-authored-by: Francisco Massa <fvsmassa@gmail.com>

* Fixing indentation.

* Adding operator in the documentation.

* Fixing lint

Co-authored-by: Francisco Massa <fvsmassa@gmail.com>
---
 docs/source/ops.rst                 |  2 ++
 test/test_ops.py                    | 28 +++++++++++++++
 torchvision/ops/__init__.py         |  3 +-
 torchvision/ops/stochastic_depth.py | 56 +++++++++++++++++++++++++++++
 4 files changed, 88 insertions(+), 1 deletion(-)
 create mode 100644 torchvision/ops/stochastic_depth.py

diff --git a/docs/source/ops.rst b/docs/source/ops.rst
index cdebe9721c3..ecef74dd8a6 100644
--- a/docs/source/ops.rst
+++ b/docs/source/ops.rst
@@ -23,6 +23,7 @@ torchvision.ops
 .. autofunction:: ps_roi_pool
 .. autofunction:: deform_conv2d
 .. autofunction:: sigmoid_focal_loss
+.. autofunction:: stochastic_depth
 
 .. autoclass:: RoIAlign
 .. autoclass:: PSRoIAlign
@@ -31,3 +32,4 @@ torchvision.ops
 .. autoclass:: DeformConv2d
 .. autoclass:: MultiScaleRoIAlign
 .. autoclass:: FeaturePyramidNetwork
+.. autoclass:: StochasticDepth
diff --git a/test/test_ops.py b/test/test_ops.py
index 5c2fc882902..c64ba1fd0bb 100644
--- a/test/test_ops.py
+++ b/test/test_ops.py
@@ -1000,5 +1000,33 @@ def gen_iou_check(box, expected, tolerance=1e-4):
             gen_iou_check(box_tensor, expected, tolerance=0.002 if dtype == torch.float16 else 1e-3)
 
 
+class TestStochasticDepth:
+    @pytest.mark.parametrize('p', [0.2, 0.5, 0.8])
+    @pytest.mark.parametrize('mode', ["batch", "row"])
+    def test_stochastic_depth(self, mode, p):
+        stats = pytest.importorskip("scipy.stats")
+        batch_size = 5
+        x = torch.ones(size=(batch_size, 3, 4, 4))
+        layer = ops.StochasticDepth(p=p, mode=mode).to(device=x.device, dtype=x.dtype)
+        layer.__repr__()
+
+        trials = 250
+        num_samples = 0
+        counts = 0
+        for _ in range(trials):
+            out = layer(x)
+            non_zero_count = out.sum(dim=(1, 2, 3)).nonzero().size(0)
+            if mode == "batch":
+                if non_zero_count == 0:
+                    counts += 1
+                num_samples += 1
+            elif mode == "row":
+                counts += batch_size - non_zero_count
+                num_samples += batch_size
+
+        p_value = stats.binom_test(counts, num_samples, p=p)
+        assert p_value > 0.0001
+
+
 if __name__ == '__main__':
     pytest.main([__file__])
diff --git a/torchvision/ops/__init__.py b/torchvision/ops/__init__.py
index 0ec189dbc2a..606c27abcbe 100644
--- a/torchvision/ops/__init__.py
+++ b/torchvision/ops/__init__.py
@@ -8,6 +8,7 @@
 from .poolers import MultiScaleRoIAlign
 from .feature_pyramid_network import FeaturePyramidNetwork
 from .focal_loss import sigmoid_focal_loss
+from .stochastic_depth import stochastic_depth, StochasticDepth
 
 from ._register_onnx_ops import _register_custom_op
 
@@ -20,5 +21,5 @@
     'box_area', 'box_iou', 'generalized_box_iou', 'roi_align', 'RoIAlign', 'roi_pool',
     'RoIPool', 'ps_roi_align', 'PSRoIAlign', 'ps_roi_pool',
     'PSRoIPool', 'MultiScaleRoIAlign', 'FeaturePyramidNetwork',
-    'sigmoid_focal_loss'
+    'sigmoid_focal_loss', 'stochastic_depth', 'StochasticDepth'
 ]
diff --git a/torchvision/ops/stochastic_depth.py b/torchvision/ops/stochastic_depth.py
new file mode 100644
index 00000000000..f3338242a76
--- /dev/null
+++ b/torchvision/ops/stochastic_depth.py
@@ -0,0 +1,56 @@
+import torch
+from torch import nn, Tensor
+
+
+def stochastic_depth(input: Tensor, p: float, mode: str, training: bool = True) -> Tensor:
+    """
+    Implements the Stochastic Depth from `"Deep Networks with Stochastic Depth"
+    <https://arxiv.org/abs/1603.09382>`_ used for randomly dropping residual
+    branches of residual architectures.
+
+    Args:
+        input (Tensor[N, ...]): The input tensor or arbitrary dimensions with the first one
+                    being its batch i.e. a batch with ``N`` rows.
+        p (float): probability of the input to be zeroed.
+        mode (str): ``"batch"`` or ``"row"``.
+                    ``"batch"`` randomly zeroes the entire input, ``"row"`` zeroes
+                    randomly selected rows from the batch.
+        training: apply stochastic depth if is ``True``. Default: ``True``
+
+    Returns:
+        Tensor[N, ...]: The randomly zeroed tensor.
+    """
+    if p < 0.0 or p > 1.0:
+        raise ValueError("drop probability has to be between 0 and 1, but got {}".format(p))
+    if not training or p == 0.0:
+        return input
+
+    survival_rate = 1.0 - p
+    if mode not in ["batch", "row"]:
+        raise ValueError("mode has to be either 'batch' or 'row', but got {}".format(mode))
+    size = [1] * input.ndim
+    if mode == "row":
+        size[0] = input.shape[0]
+    noise = torch.empty(size, dtype=input.dtype, device=input.device)
+    noise = noise.bernoulli_(survival_rate).div_(survival_rate)
+    return input * noise
+
+
+class StochasticDepth(nn.Module):
+    """
+    See :func:`stochastic_depth`.
+    """
+    def __init__(self, p: float, mode: str) -> None:
+        super().__init__()
+        self.p = p
+        self.mode = mode
+
+    def forward(self, input: Tensor) -> Tensor:
+        return stochastic_depth(input, self.p, self.mode, self.training)
+
+    def __repr__(self) -> str:
+        tmpstr = self.__class__.__name__ + '('
+        tmpstr += 'p=' + str(self.p)
+        tmpstr += ', mode=' + str(self.mode)
+        tmpstr += ')'
+        return tmpstr

From 7947fc8fb38b1d3a2aca03f22a2e6a3caa63f2a0 Mon Sep 17 00:00:00 2001
From: Nikita Shulga <nshulga@fb.com>
Date: Sun, 22 Aug 2021 20:07:08 -0700
Subject: [PATCH 02/12] Prep change for changing def branch to main (#4306)

---
 .circleci/build_docs/commit_docs.sh |  4 +-
 .circleci/config.yml                | 76 ++++++++++++++---------------
 .circleci/config.yml.in             |  8 +--
 .circleci/regenerate.py             |  4 +-
 .github/workflows/bandit.yml        |  2 +-
 .github/workflows/codeql.yml        |  2 +-
 CONTRIBUTING.md                     |  2 +-
 README.rst                          |  4 +-
 android/gradle.properties           |  2 +-
 docs/source/conf.py                 |  4 +-
 test/test_cpp_models.py             |  2 +-
 torchvision/__init__.py             |  2 +-
 12 files changed, 56 insertions(+), 56 deletions(-)

diff --git a/.circleci/build_docs/commit_docs.sh b/.circleci/build_docs/commit_docs.sh
index b923b0edbc4..04e3538fefc 100755
--- a/.circleci/build_docs/commit_docs.sh
+++ b/.circleci/build_docs/commit_docs.sh
@@ -6,7 +6,7 @@ set -ex
 if [ "$2" == "" ]; then
     echo call as "$0" "<src>" "<target branch>"
     echo where src is the root of the built documentation git checkout and
-    echo branch should be "master" or "1.7" or so
+    echo branch should be "main" or "1.7" or so
     exit 1
 fi
 
@@ -20,7 +20,7 @@ git checkout gh-pages
 mkdir -p ./"${target}"
 rm -rf ./"${target}"/*
 cp -r "${src}/docs/build/html/"* ./"$target"
-if [ "${target}" == "master" ]; then
+if [ "${target}" == "main" ]; then
     mkdir -p ./_static
     rm -rf ./_static/*
     cp -r "${src}/docs/build/html/_static/"* ./_static
diff --git a/.circleci/config.yml b/.circleci/config.yml
index 03f47e473ce..66c16edfe3e 100644
--- a/.circleci/config.yml
+++ b/.circleci/config.yml
@@ -29,7 +29,7 @@ commands:
 #         command: |
 #           set -ex
 #           BRANCH=$(git rev-parse --abbrev-ref HEAD)
-#           if [[ "$BRANCH" != "master" ]]; then
+#           if [[ "$BRANCH" != "main" ]]; then
 #             git fetch --force origin ${CIRCLE_BRANCH}/merge:merged/${CIRCLE_BRANCH}
 #             git checkout "merged/$CIRCLE_BRANCH"
 #           fi
@@ -936,7 +936,7 @@ jobs:
           command: |
             set -ex
             tag=${CIRCLE_TAG:1:5}
-            VERSION=${tag:-master}
+            VERSION=${tag:-main}
             eval "$(./conda/bin/conda shell.bash hook)"
             conda activate ./env
             pushd docs
@@ -982,7 +982,7 @@ jobs:
             # https://circleci.com/docs/2.0/configuration-reference/#checkout
             set -ex
             tag=${CIRCLE_TAG:1:5}
-            target=${tag:-master}
+            target=${tag:-main}
             ~/workspace/.circleci/build_docs/commit_docs.sh ~/workspace $target
 
 
@@ -1159,7 +1159,7 @@ workflows:
           cu_version: cpu
           filters:
             branches:
-              only: master
+              only: main
             tags:
               only: /v[0-9]+(\.[0-9]+)*-rc[0-9]+/
           name: binary_win_wheel_py3.6_cpu
@@ -1168,7 +1168,7 @@ workflows:
           cu_version: cu102
           filters:
             branches:
-              only: master
+              only: main
             tags:
               only: /v[0-9]+(\.[0-9]+)*-rc[0-9]+/
           name: binary_win_wheel_py3.6_cu102
@@ -1177,7 +1177,7 @@ workflows:
           cu_version: cu111
           filters:
             branches:
-              only: master
+              only: main
             tags:
               only: /v[0-9]+(\.[0-9]+)*-rc[0-9]+/
           name: binary_win_wheel_py3.6_cu111
@@ -1186,7 +1186,7 @@ workflows:
           cu_version: cu113
           filters:
             branches:
-              only: master
+              only: main
             tags:
               only: /v[0-9]+(\.[0-9]+)*-rc[0-9]+/
           name: binary_win_wheel_py3.6_cu113
@@ -1195,7 +1195,7 @@ workflows:
           cu_version: cpu
           filters:
             branches:
-              only: master
+              only: main
             tags:
               only: /v[0-9]+(\.[0-9]+)*-rc[0-9]+/
           name: binary_win_wheel_py3.7_cpu
@@ -1204,7 +1204,7 @@ workflows:
           cu_version: cu102
           filters:
             branches:
-              only: master
+              only: main
             tags:
               only: /v[0-9]+(\.[0-9]+)*-rc[0-9]+/
           name: binary_win_wheel_py3.7_cu102
@@ -1213,7 +1213,7 @@ workflows:
           cu_version: cu111
           filters:
             branches:
-              only: master
+              only: main
             tags:
               only: /v[0-9]+(\.[0-9]+)*-rc[0-9]+/
           name: binary_win_wheel_py3.7_cu111
@@ -1222,7 +1222,7 @@ workflows:
           cu_version: cu113
           filters:
             branches:
-              only: master
+              only: main
             tags:
               only: /v[0-9]+(\.[0-9]+)*-rc[0-9]+/
           name: binary_win_wheel_py3.7_cu113
@@ -1231,7 +1231,7 @@ workflows:
           cu_version: cpu
           filters:
             branches:
-              only: master
+              only: main
             tags:
               only: /v[0-9]+(\.[0-9]+)*-rc[0-9]+/
           name: binary_win_wheel_py3.8_cpu
@@ -1240,7 +1240,7 @@ workflows:
           cu_version: cu102
           filters:
             branches:
-              only: master
+              only: main
             tags:
               only: /v[0-9]+(\.[0-9]+)*-rc[0-9]+/
           name: binary_win_wheel_py3.8_cu102
@@ -1249,7 +1249,7 @@ workflows:
           cu_version: cu111
           filters:
             branches:
-              only: master
+              only: main
             tags:
               only: /v[0-9]+(\.[0-9]+)*-rc[0-9]+/
           name: binary_win_wheel_py3.8_cu111
@@ -1258,7 +1258,7 @@ workflows:
           cu_version: cu113
           filters:
             branches:
-              only: master
+              only: main
             tags:
               only: /v[0-9]+(\.[0-9]+)*-rc[0-9]+/
           name: binary_win_wheel_py3.8_cu113
@@ -1271,7 +1271,7 @@ workflows:
           cu_version: cu102
           filters:
             branches:
-              only: master
+              only: main
             tags:
               only: /v[0-9]+(\.[0-9]+)*-rc[0-9]+/
           name: binary_win_wheel_py3.9_cu102
@@ -1280,7 +1280,7 @@ workflows:
           cu_version: cu111
           filters:
             branches:
-              only: master
+              only: main
             tags:
               only: /v[0-9]+(\.[0-9]+)*-rc[0-9]+/
           name: binary_win_wheel_py3.9_cu111
@@ -1413,7 +1413,7 @@ workflows:
           cu_version: cpu
           filters:
             branches:
-              only: master
+              only: main
             tags:
               only: /v[0-9]+(\.[0-9]+)*-rc[0-9]+/
           name: binary_win_conda_py3.6_cpu
@@ -1422,7 +1422,7 @@ workflows:
           cu_version: cu102
           filters:
             branches:
-              only: master
+              only: main
             tags:
               only: /v[0-9]+(\.[0-9]+)*-rc[0-9]+/
           name: binary_win_conda_py3.6_cu102
@@ -1431,7 +1431,7 @@ workflows:
           cu_version: cu111
           filters:
             branches:
-              only: master
+              only: main
             tags:
               only: /v[0-9]+(\.[0-9]+)*-rc[0-9]+/
           name: binary_win_conda_py3.6_cu111
@@ -1440,7 +1440,7 @@ workflows:
           cu_version: cu113
           filters:
             branches:
-              only: master
+              only: main
             tags:
               only: /v[0-9]+(\.[0-9]+)*-rc[0-9]+/
           name: binary_win_conda_py3.6_cu113
@@ -1449,7 +1449,7 @@ workflows:
           cu_version: cpu
           filters:
             branches:
-              only: master
+              only: main
             tags:
               only: /v[0-9]+(\.[0-9]+)*-rc[0-9]+/
           name: binary_win_conda_py3.7_cpu
@@ -1458,7 +1458,7 @@ workflows:
           cu_version: cu102
           filters:
             branches:
-              only: master
+              only: main
             tags:
               only: /v[0-9]+(\.[0-9]+)*-rc[0-9]+/
           name: binary_win_conda_py3.7_cu102
@@ -1467,7 +1467,7 @@ workflows:
           cu_version: cu111
           filters:
             branches:
-              only: master
+              only: main
             tags:
               only: /v[0-9]+(\.[0-9]+)*-rc[0-9]+/
           name: binary_win_conda_py3.7_cu111
@@ -1476,7 +1476,7 @@ workflows:
           cu_version: cu113
           filters:
             branches:
-              only: master
+              only: main
             tags:
               only: /v[0-9]+(\.[0-9]+)*-rc[0-9]+/
           name: binary_win_conda_py3.7_cu113
@@ -1485,7 +1485,7 @@ workflows:
           cu_version: cpu
           filters:
             branches:
-              only: master
+              only: main
             tags:
               only: /v[0-9]+(\.[0-9]+)*-rc[0-9]+/
           name: binary_win_conda_py3.8_cpu
@@ -1494,7 +1494,7 @@ workflows:
           cu_version: cu102
           filters:
             branches:
-              only: master
+              only: main
             tags:
               only: /v[0-9]+(\.[0-9]+)*-rc[0-9]+/
           name: binary_win_conda_py3.8_cu102
@@ -1503,7 +1503,7 @@ workflows:
           cu_version: cu111
           filters:
             branches:
-              only: master
+              only: main
             tags:
               only: /v[0-9]+(\.[0-9]+)*-rc[0-9]+/
           name: binary_win_conda_py3.8_cu111
@@ -1512,7 +1512,7 @@ workflows:
           cu_version: cu113
           filters:
             branches:
-              only: master
+              only: main
             tags:
               only: /v[0-9]+(\.[0-9]+)*-rc[0-9]+/
           name: binary_win_conda_py3.8_cu113
@@ -1525,7 +1525,7 @@ workflows:
           cu_version: cu102
           filters:
             branches:
-              only: master
+              only: main
             tags:
               only: /v[0-9]+(\.[0-9]+)*-rc[0-9]+/
           name: binary_win_conda_py3.9_cu102
@@ -1534,7 +1534,7 @@ workflows:
           cu_version: cu111
           filters:
             branches:
-              only: master
+              only: main
             tags:
               only: /v[0-9]+(\.[0-9]+)*-rc[0-9]+/
           name: binary_win_conda_py3.9_cu111
@@ -1609,7 +1609,7 @@ workflows:
           filters:
             branches:
               only:
-              - master
+              - main
               - nightly
           name: unittest_linux_gpu_py3.6
           python_version: '3.6'
@@ -1618,7 +1618,7 @@ workflows:
           filters:
             branches:
               only:
-              - master
+              - main
               - nightly
           name: unittest_linux_gpu_py3.7
           python_version: '3.7'
@@ -1631,7 +1631,7 @@ workflows:
           filters:
             branches:
               only:
-              - master
+              - main
               - nightly
           name: unittest_linux_gpu_py3.9
           python_version: '3.9'
@@ -1656,7 +1656,7 @@ workflows:
           filters:
             branches:
               only:
-              - master
+              - main
               - nightly
           name: unittest_windows_gpu_py3.6
           python_version: '3.6'
@@ -1665,7 +1665,7 @@ workflows:
           filters:
             branches:
               only:
-              - master
+              - main
               - nightly
           name: unittest_windows_gpu_py3.7
           python_version: '3.7'
@@ -1678,7 +1678,7 @@ workflows:
           filters:
             branches:
               only:
-              - master
+              - main
               - nightly
           name: unittest_windows_gpu_py3.9
           python_version: '3.9'
@@ -4075,7 +4075,7 @@ workflows:
           filters:
             branches:
               only:
-                - master
+                - main
     jobs:
       - smoke_test_docker_image_build:
           context: org-member
diff --git a/.circleci/config.yml.in b/.circleci/config.yml.in
index 15cb7eb6a07..980332d3ea9 100644
--- a/.circleci/config.yml.in
+++ b/.circleci/config.yml.in
@@ -29,7 +29,7 @@ commands:
 #         command: |
 #           set -ex
 #           BRANCH=$(git rev-parse --abbrev-ref HEAD)
-#           if [[ "$BRANCH" != "master" ]]; then
+#           if [[ "$BRANCH" != "main" ]]; then
 #             git fetch --force origin ${CIRCLE_BRANCH}/merge:merged/${CIRCLE_BRANCH}
 #             git checkout "merged/$CIRCLE_BRANCH"
 #           fi
@@ -936,7 +936,7 @@ jobs:
           command: |
             set -ex
             tag=${CIRCLE_TAG:1:5}
-            VERSION=${tag:-master}
+            VERSION=${tag:-main}
             eval "$(./conda/bin/conda shell.bash hook)"
             conda activate ./env
             pushd docs
@@ -982,7 +982,7 @@ jobs:
             # https://circleci.com/docs/2.0/configuration-reference/#checkout
             set -ex
             tag=${CIRCLE_TAG:1:5}
-            target=${tag:-master}
+            target=${tag:-main}
             ~/workspace/.circleci/build_docs/commit_docs.sh ~/workspace $target
 
 
@@ -1029,7 +1029,7 @@ workflows:
           filters:
             branches:
               only:
-                - master
+                - main
     jobs:
       - smoke_test_docker_image_build:
           context: org-member
diff --git a/.circleci/regenerate.py b/.circleci/regenerate.py
index 157cabca433..7e2fa25cb9d 100755
--- a/.circleci/regenerate.py
+++ b/.circleci/regenerate.py
@@ -44,7 +44,7 @@ def build_workflows(prefix='', filter_branch=None, upload=False, indentation=6,
                         if windows_latest_only and os_type == "win" and filter_branch is None and \
                             (python_version != python_versions[-1] or
                              (cu_version not in [cu_versions[0], cu_versions[-1]])):
-                            fb = "master"
+                            fb = "main"
                         if not fb and (os_type == 'linux' and
                                        cu_version == 'cpu' and
                                        btype == 'wheel' and
@@ -241,7 +241,7 @@ def unittest_workflows(indentation=6):
 
                 if device_type == 'gpu':
                     if python_version != "3.8":
-                        job['filters'] = gen_filter_branch_tree('master', 'nightly')
+                        job['filters'] = gen_filter_branch_tree('main', 'nightly')
                     job['cu_version'] = 'cu102'
                 else:
                     job['cu_version'] = 'cpu'
diff --git a/.github/workflows/bandit.yml b/.github/workflows/bandit.yml
index 93bae80f9bd..84200b438a9 100644
--- a/.github/workflows/bandit.yml
+++ b/.github/workflows/bandit.yml
@@ -4,7 +4,7 @@ name: Bandit
 
 on:
   pull_request:
-    branches: [ master ]
+    branches: [ main ]
 
   workflow_dispatch:
 
diff --git a/.github/workflows/codeql.yml b/.github/workflows/codeql.yml
index 3c8bc96a5bd..99460b11228 100644
--- a/.github/workflows/codeql.yml
+++ b/.github/workflows/codeql.yml
@@ -4,7 +4,7 @@ name: CodeQL
 
 on:
   pull_request:
-    branches: [ master ]
+    branches: [ main ]
 
   workflow_dispatch:
 
diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md
index 75405c94a83..55880ae5d70 100644
--- a/CONTRIBUTING.md
+++ b/CONTRIBUTING.md
@@ -60,7 +60,7 @@ conda install libpng jpeg
 
 If you plan to modify the code or documentation, please follow the steps below:
 
-1. Fork the repository and create your branch from `master`.
+1. Fork the repository and create your branch from `main`.
 2. If you have modified the code (new feature or bug-fix), please add unit tests.
 3. If you have changed APIs, update the documentation. Make sure the documentation builds.
 4. Ensure the test suite passes.
diff --git a/README.rst b/README.rst
index 2c6daee4cc2..35cef0c19df 100644
--- a/README.rst
+++ b/README.rst
@@ -21,7 +21,7 @@ supported Python versions.
 +--------------------------+--------------------------+---------------------------------+
 | ``torch``                | ``torchvision``          | ``python``                      |
 +==========================+==========================+=================================+
-| ``master`` / ``nightly`` | ``master`` / ``nightly`` | ``>=3.6``                       |
+| ``master`` / ``nightly`` | ``main`` / ``nightly`` | ``>=3.6``                       |
 +--------------------------+--------------------------+---------------------------------+
 | ``1.9.0``                | ``0.10.0``               | ``>=3.6``                       |
 +--------------------------+--------------------------+---------------------------------+
@@ -76,7 +76,7 @@ From source:
 
 
 In case building TorchVision from source fails, install the nightly version of PyTorch following
-the linked guide on the  `contributing page <https://github.com/pytorch/vision/blob/master/CONTRIBUTING.md#development-installation>`_ and retry the install.
+the linked guide on the  `contributing page <https://github.com/pytorch/vision/blob/main/CONTRIBUTING.md#development-installation>`_ and retry the install.
 
 By default, GPU support is built if CUDA is found and ``torch.cuda.is_available()`` is true.
 It's possible to force building GPU support by setting ``FORCE_CUDA=1`` environment variable,
diff --git a/android/gradle.properties b/android/gradle.properties
index a8105544f30..9c5b8f61212 100644
--- a/android/gradle.properties
+++ b/android/gradle.properties
@@ -9,7 +9,7 @@ POM_SCM_URL=https://github.com/pytorch/vision.git
 POM_SCM_CONNECTION=scm:git:https://github.com/pytorch/vision
 POM_SCM_DEV_CONNECTION=scm:git:git@github.com:pytorch/vision.git
 POM_LICENSE_NAME=BSD 3-Clause
-POM_LICENSE_URL=https://github.com/pytorch/vision/blob/master/LICENSE
+POM_LICENSE_URL=https://github.com/pytorch/vision/blob/main/LICENSE
 POM_ISSUES_URL=https://github.com/pytorch/vision/issues
 POM_LICENSE_DIST=repo
 POM_DEVELOPER_ID=pytorch
diff --git a/docs/source/conf.py b/docs/source/conf.py
index 6bbb05c13c7..e8e17edf283 100644
--- a/docs/source/conf.py
+++ b/docs/source/conf.py
@@ -82,10 +82,10 @@
 #
 # The short X.Y version.
 # TODO: change to [:2] at v1.0
-version = 'master (' + torchvision.__version__ + ' )'
+version = 'main (' + torchvision.__version__ + ' )'
 # The full version, including alpha/beta/rc tags.
 # TODO: verify this works as expected
-release = 'master'
+release = 'main'
 
 # The language for content autogenerated by Sphinx. Refer to documentation
 # for a list of supported languages.
diff --git a/test/test_cpp_models.py b/test/test_cpp_models.py
index 6deb5d79739..2307051ff60 100644
--- a/test/test_cpp_models.py
+++ b/test/test_cpp_models.py
@@ -45,7 +45,7 @@ def read_image2():
 
 @unittest.skipIf(
     sys.platform == "darwin" or True,
-    "C++ models are broken on OS X at the moment, and there's a BC breakage on master; "
+    "C++ models are broken on OS X at the moment, and there's a BC breakage on main; "
     "see https://github.com/pytorch/vision/issues/1191")
 class Tester(unittest.TestCase):
     pretrained = False
diff --git a/torchvision/__init__.py b/torchvision/__init__.py
index 3cbdda7af7f..a5a6f568151 100644
--- a/torchvision/__init__.py
+++ b/torchvision/__init__.py
@@ -66,7 +66,7 @@ def set_video_backend(backend):
             It generally decodes faster than :mod:`pyav`, but is perhaps less robust.
 
     .. note::
-        Building with FFMPEG is disabled by default in the latest master. If you want to use the 'video_reader'
+        Building with FFMPEG is disabled by default in the latest `main`. If you want to use the 'video_reader'
         backend, please compile torchvision from source.
     """
     global _video_backend

From 185be3a9811a9cc6dc6206a397d035be3c20649b Mon Sep 17 00:00:00 2001
From: F-G Fernandez <fgfm03@hotmail.fr>
Date: Mon, 23 Aug 2021 13:50:59 +0200
Subject: [PATCH 03/12] Added typing annotations to models/segmentation 
 (#4227)

* style: Added typing annotations to segmentation/_utils

* style: Added typing annotations to segmentation/segmentation

* style: Added typing annotations to remaining segmentation models

* style: Fixed typing of DeepLab

* style: Fixed typing

* fix: Fixed typing annotations & default values

* Fixing python_type_check
---
 torchvision/models/segmentation/_utils.py     | 12 ++-
 torchvision/models/segmentation/deeplabv3.py  | 19 ++---
 torchvision/models/segmentation/fcn.py        |  2 +-
 torchvision/models/segmentation/lraspp.py     | 19 ++++-
 .../models/segmentation/segmentation.py       | 76 +++++++++++++++----
 5 files changed, 97 insertions(+), 31 deletions(-)

diff --git a/torchvision/models/segmentation/_utils.py b/torchvision/models/segmentation/_utils.py
index 176b7490038..fb94b9b1528 100644
--- a/torchvision/models/segmentation/_utils.py
+++ b/torchvision/models/segmentation/_utils.py
@@ -1,19 +1,25 @@
 from collections import OrderedDict
+from typing import Optional, Dict
 
-from torch import nn
+from torch import nn, Tensor
 from torch.nn import functional as F
 
 
 class _SimpleSegmentationModel(nn.Module):
     __constants__ = ['aux_classifier']
 
-    def __init__(self, backbone, classifier, aux_classifier=None):
+    def __init__(
+        self,
+        backbone: nn.Module,
+        classifier: nn.Module,
+        aux_classifier: Optional[nn.Module] = None
+    ) -> None:
         super(_SimpleSegmentationModel, self).__init__()
         self.backbone = backbone
         self.classifier = classifier
         self.aux_classifier = aux_classifier
 
-    def forward(self, x):
+    def forward(self, x: Tensor) -> Dict[str, Tensor]:
         input_shape = x.shape[-2:]
         # contract: features is a dict of tensors
         features = self.backbone(x)
diff --git a/torchvision/models/segmentation/deeplabv3.py b/torchvision/models/segmentation/deeplabv3.py
index 7acc013ccb1..15ab8846e7d 100644
--- a/torchvision/models/segmentation/deeplabv3.py
+++ b/torchvision/models/segmentation/deeplabv3.py
@@ -1,6 +1,7 @@
 import torch
 from torch import nn
 from torch.nn import functional as F
+from typing import List
 
 from ._utils import _SimpleSegmentationModel
 
@@ -27,7 +28,7 @@ class DeepLabV3(_SimpleSegmentationModel):
 
 
 class DeepLabHead(nn.Sequential):
-    def __init__(self, in_channels, num_classes):
+    def __init__(self, in_channels: int, num_classes: int) -> None:
         super(DeepLabHead, self).__init__(
             ASPP(in_channels, [12, 24, 36]),
             nn.Conv2d(256, 256, 3, padding=1, bias=False),
@@ -38,7 +39,7 @@ def __init__(self, in_channels, num_classes):
 
 
 class ASPPConv(nn.Sequential):
-    def __init__(self, in_channels, out_channels, dilation):
+    def __init__(self, in_channels: int, out_channels: int, dilation: int) -> None:
         modules = [
             nn.Conv2d(in_channels, out_channels, 3, padding=dilation, dilation=dilation, bias=False),
             nn.BatchNorm2d(out_channels),
@@ -48,14 +49,14 @@ def __init__(self, in_channels, out_channels, dilation):
 
 
 class ASPPPooling(nn.Sequential):
-    def __init__(self, in_channels, out_channels):
+    def __init__(self, in_channels: int, out_channels: int) -> None:
         super(ASPPPooling, self).__init__(
             nn.AdaptiveAvgPool2d(1),
             nn.Conv2d(in_channels, out_channels, 1, bias=False),
             nn.BatchNorm2d(out_channels),
             nn.ReLU())
 
-    def forward(self, x):
+    def forward(self, x: torch.Tensor) -> torch.Tensor:
         size = x.shape[-2:]
         for mod in self:
             x = mod(x)
@@ -63,7 +64,7 @@ def forward(self, x):
 
 
 class ASPP(nn.Module):
-    def __init__(self, in_channels, atrous_rates, out_channels=256):
+    def __init__(self, in_channels: int, atrous_rates: List[int], out_channels: int = 256) -> None:
         super(ASPP, self).__init__()
         modules = []
         modules.append(nn.Sequential(
@@ -85,9 +86,9 @@ def __init__(self, in_channels, atrous_rates, out_channels=256):
             nn.ReLU(),
             nn.Dropout(0.5))
 
-    def forward(self, x):
-        res = []
+    def forward(self, x: torch.Tensor) -> torch.Tensor:
+        _res = []
         for conv in self.convs:
-            res.append(conv(x))
-        res = torch.cat(res, dim=1)
+            _res.append(conv(x))
+        res = torch.cat(_res, dim=1)
         return self.project(res)
diff --git a/torchvision/models/segmentation/fcn.py b/torchvision/models/segmentation/fcn.py
index 3c695b53167..9c8db1e1211 100644
--- a/torchvision/models/segmentation/fcn.py
+++ b/torchvision/models/segmentation/fcn.py
@@ -23,7 +23,7 @@ class FCN(_SimpleSegmentationModel):
 
 
 class FCNHead(nn.Sequential):
-    def __init__(self, in_channels, channels):
+    def __init__(self, in_channels: int, channels: int) -> None:
         inter_channels = in_channels // 4
         layers = [
             nn.Conv2d(in_channels, inter_channels, 3, padding=1, bias=False),
diff --git a/torchvision/models/segmentation/lraspp.py b/torchvision/models/segmentation/lraspp.py
index 44cd9b1e773..0e5fb5ee898 100644
--- a/torchvision/models/segmentation/lraspp.py
+++ b/torchvision/models/segmentation/lraspp.py
@@ -24,12 +24,19 @@ class LRASPP(nn.Module):
         inter_channels (int, optional): the number of channels for intermediate computations.
     """
 
-    def __init__(self, backbone, low_channels, high_channels, num_classes, inter_channels=128):
+    def __init__(
+        self,
+        backbone: nn.Module,
+        low_channels: int,
+        high_channels: int,
+        num_classes: int,
+        inter_channels: int = 128
+    ) -> None:
         super().__init__()
         self.backbone = backbone
         self.classifier = LRASPPHead(low_channels, high_channels, num_classes, inter_channels)
 
-    def forward(self, input):
+    def forward(self, input: Tensor) -> Dict[str, Tensor]:
         features = self.backbone(input)
         out = self.classifier(features)
         out = F.interpolate(out, size=input.shape[-2:], mode='bilinear', align_corners=False)
@@ -42,7 +49,13 @@ def forward(self, input):
 
 class LRASPPHead(nn.Module):
 
-    def __init__(self, low_channels, high_channels, num_classes, inter_channels):
+    def __init__(
+        self,
+        low_channels: int,
+        high_channels: int,
+        num_classes: int,
+        inter_channels: int
+    ) -> None:
         super().__init__()
         self.cbr = nn.Sequential(
             nn.Conv2d(high_channels, inter_channels, 1, bias=False),
diff --git a/torchvision/models/segmentation/segmentation.py b/torchvision/models/segmentation/segmentation.py
index 0f2f14c97ba..938965e330b 100644
--- a/torchvision/models/segmentation/segmentation.py
+++ b/torchvision/models/segmentation/segmentation.py
@@ -1,3 +1,5 @@
+from torch import nn
+from typing import Any, Optional
 from .._utils import IntermediateLayerGetter
 from ..._internally_replaced_utils import load_state_dict_from_url
 from .. import mobilenetv3
@@ -22,7 +24,13 @@
 }
 
 
-def _segm_model(name, backbone_name, num_classes, aux, pretrained_backbone=True):
+def _segm_model(
+    name: str,
+    backbone_name: str,
+    num_classes: int,
+    aux: Optional[bool],
+    pretrained_backbone: bool = True
+) -> nn.Module:
     if 'resnet' in backbone_name:
         backbone = resnet.__dict__[backbone_name](
             pretrained=pretrained_backbone,
@@ -66,7 +74,15 @@ def _segm_model(name, backbone_name, num_classes, aux, pretrained_backbone=True)
     return model
 
 
-def _load_model(arch_type, backbone, pretrained, progress, num_classes, aux_loss, **kwargs):
+def _load_model(
+    arch_type: str,
+    backbone: str,
+    pretrained: bool,
+    progress: bool,
+    num_classes: int,
+    aux_loss: Optional[bool],
+    **kwargs: Any
+) -> nn.Module:
     if pretrained:
         aux_loss = True
         kwargs["pretrained_backbone"] = False
@@ -76,7 +92,7 @@ def _load_model(arch_type, backbone, pretrained, progress, num_classes, aux_loss
     return model
 
 
-def _load_weights(model, arch_type, backbone, progress):
+def _load_weights(model: nn.Module, arch_type: str, backbone: str, progress: bool) -> None:
     arch = arch_type + '_' + backbone + '_coco'
     model_url = model_urls.get(arch, None)
     if model_url is None:
@@ -86,7 +102,7 @@ def _load_weights(model, arch_type, backbone, progress):
         model.load_state_dict(state_dict)
 
 
-def _segm_lraspp_mobilenetv3(backbone_name, num_classes, pretrained_backbone=True):
+def _segm_lraspp_mobilenetv3(backbone_name: str, num_classes: int, pretrained_backbone: bool = True) -> LRASPP:
     backbone = mobilenetv3.__dict__[backbone_name](pretrained=pretrained_backbone, dilated=True).features
 
     # Gather the indices of blocks which are strided. These are the locations of C1, ..., Cn-1 blocks.
@@ -103,8 +119,13 @@ def _segm_lraspp_mobilenetv3(backbone_name, num_classes, pretrained_backbone=Tru
     return model
 
 
-def fcn_resnet50(pretrained=False, progress=True,
-                 num_classes=21, aux_loss=None, **kwargs):
+def fcn_resnet50(
+    pretrained: bool = False,
+    progress: bool = True,
+    num_classes: int = 21,
+    aux_loss: Optional[bool] = None,
+    **kwargs: Any
+) -> nn.Module:
     """Constructs a Fully-Convolutional Network model with a ResNet-50 backbone.
 
     Args:
@@ -117,8 +138,13 @@ def fcn_resnet50(pretrained=False, progress=True,
     return _load_model('fcn', 'resnet50', pretrained, progress, num_classes, aux_loss, **kwargs)
 
 
-def fcn_resnet101(pretrained=False, progress=True,
-                  num_classes=21, aux_loss=None, **kwargs):
+def fcn_resnet101(
+    pretrained: bool = False,
+    progress: bool = True,
+    num_classes: int = 21,
+    aux_loss: Optional[bool] = None,
+    **kwargs: Any
+) -> nn.Module:
     """Constructs a Fully-Convolutional Network model with a ResNet-101 backbone.
 
     Args:
@@ -131,8 +157,13 @@ def fcn_resnet101(pretrained=False, progress=True,
     return _load_model('fcn', 'resnet101', pretrained, progress, num_classes, aux_loss, **kwargs)
 
 
-def deeplabv3_resnet50(pretrained=False, progress=True,
-                       num_classes=21, aux_loss=None, **kwargs):
+def deeplabv3_resnet50(
+    pretrained: bool = False,
+    progress: bool = True,
+    num_classes: int = 21,
+    aux_loss: Optional[bool] = None,
+    **kwargs: Any
+) -> nn.Module:
     """Constructs a DeepLabV3 model with a ResNet-50 backbone.
 
     Args:
@@ -145,8 +176,13 @@ def deeplabv3_resnet50(pretrained=False, progress=True,
     return _load_model('deeplabv3', 'resnet50', pretrained, progress, num_classes, aux_loss, **kwargs)
 
 
-def deeplabv3_resnet101(pretrained=False, progress=True,
-                        num_classes=21, aux_loss=None, **kwargs):
+def deeplabv3_resnet101(
+    pretrained: bool = False,
+    progress: bool = True,
+    num_classes: int = 21,
+    aux_loss: Optional[bool] = None,
+    **kwargs: Any
+) -> nn.Module:
     """Constructs a DeepLabV3 model with a ResNet-101 backbone.
 
     Args:
@@ -159,8 +195,13 @@ def deeplabv3_resnet101(pretrained=False, progress=True,
     return _load_model('deeplabv3', 'resnet101', pretrained, progress, num_classes, aux_loss, **kwargs)
 
 
-def deeplabv3_mobilenet_v3_large(pretrained=False, progress=True,
-                                 num_classes=21, aux_loss=None, **kwargs):
+def deeplabv3_mobilenet_v3_large(
+    pretrained: bool = False,
+    progress: bool = True,
+    num_classes: int = 21,
+    aux_loss: Optional[bool] = None,
+    **kwargs: Any
+) -> nn.Module:
     """Constructs a DeepLabV3 model with a MobileNetV3-Large backbone.
 
     Args:
@@ -173,7 +214,12 @@ def deeplabv3_mobilenet_v3_large(pretrained=False, progress=True,
     return _load_model('deeplabv3', 'mobilenet_v3_large', pretrained, progress, num_classes, aux_loss, **kwargs)
 
 
-def lraspp_mobilenet_v3_large(pretrained=False, progress=True, num_classes=21, **kwargs):
+def lraspp_mobilenet_v3_large(
+    pretrained: bool = False,
+    progress: bool = True,
+    num_classes: int = 21,
+    **kwargs: Any
+) -> nn.Module:
     """Constructs a Lite R-ASPP Network model with a MobileNetV3-Large backbone.
 
     Args:

From 11d36292b84c640b23f446b862ba4992e51b5e63 Mon Sep 17 00:00:00 2001
From: F-G Fernandez <fgfm03@hotmail.fr>
Date: Mon, 23 Aug 2021 16:51:35 +0200
Subject: [PATCH 04/12] Added typing annotations to models/video (#4229)

* style: Added typing to models/video

* style: Fixed typing

* style: Fixed typing

* style: Fixed typing

* refactor: Removed default value for stem

* docs: Fixed docstring of VideoResNet

* style: Refactored typing

* docs: Fixed docstring

* style: Fixed typing

* docs: Specified docstring

* typing: Fixed tying

* docs: Fixed docstring

* Undoing change.
---
 torchvision/models/video/resnet.py | 120 +++++++++++++++++++----------
 1 file changed, 78 insertions(+), 42 deletions(-)

diff --git a/torchvision/models/video/resnet.py b/torchvision/models/video/resnet.py
index fc69188ef7a..faf3b3bc4a8 100644
--- a/torchvision/models/video/resnet.py
+++ b/torchvision/models/video/resnet.py
@@ -1,4 +1,6 @@
+from torch import Tensor
 import torch.nn as nn
+from typing import Tuple, Optional, Callable, List, Type, Any, Union
 
 from ..._internally_replaced_utils import load_state_dict_from_url
 
@@ -13,12 +15,14 @@
 
 
 class Conv3DSimple(nn.Conv3d):
-    def __init__(self,
-                 in_planes,
-                 out_planes,
-                 midplanes=None,
-                 stride=1,
-                 padding=1):
+    def __init__(
+        self,
+        in_planes: int,
+        out_planes: int,
+        midplanes: Optional[int] = None,
+        stride: int = 1,
+        padding: int = 1
+    ) -> None:
 
         super(Conv3DSimple, self).__init__(
             in_channels=in_planes,
@@ -29,18 +33,20 @@ def __init__(self,
             bias=False)
 
     @staticmethod
-    def get_downsample_stride(stride):
+    def get_downsample_stride(stride: int) -> Tuple[int, int, int]:
         return stride, stride, stride
 
 
 class Conv2Plus1D(nn.Sequential):
 
-    def __init__(self,
-                 in_planes,
-                 out_planes,
-                 midplanes,
-                 stride=1,
-                 padding=1):
+    def __init__(
+        self,
+        in_planes: int,
+        out_planes: int,
+        midplanes: int,
+        stride: int = 1,
+        padding: int = 1
+    ) -> None:
         super(Conv2Plus1D, self).__init__(
             nn.Conv3d(in_planes, midplanes, kernel_size=(1, 3, 3),
                       stride=(1, stride, stride), padding=(0, padding, padding),
@@ -52,18 +58,20 @@ def __init__(self,
                       bias=False))
 
     @staticmethod
-    def get_downsample_stride(stride):
+    def get_downsample_stride(stride: int) -> Tuple[int, int, int]:
         return stride, stride, stride
 
 
 class Conv3DNoTemporal(nn.Conv3d):
 
-    def __init__(self,
-                 in_planes,
-                 out_planes,
-                 midplanes=None,
-                 stride=1,
-                 padding=1):
+    def __init__(
+        self,
+        in_planes: int,
+        out_planes: int,
+        midplanes: Optional[int] = None,
+        stride: int = 1,
+        padding: int = 1
+    ) -> None:
 
         super(Conv3DNoTemporal, self).__init__(
             in_channels=in_planes,
@@ -74,7 +82,7 @@ def __init__(self,
             bias=False)
 
     @staticmethod
-    def get_downsample_stride(stride):
+    def get_downsample_stride(stride: int) -> Tuple[int, int, int]:
         return 1, stride, stride
 
 
@@ -82,7 +90,14 @@ class BasicBlock(nn.Module):
 
     expansion = 1
 
-    def __init__(self, inplanes, planes, conv_builder, stride=1, downsample=None):
+    def __init__(
+        self,
+        inplanes: int,
+        planes: int,
+        conv_builder: Callable[..., nn.Module],
+        stride: int = 1,
+        downsample: Optional[nn.Module] = None,
+    ) -> None:
         midplanes = (inplanes * planes * 3 * 3 * 3) // (inplanes * 3 * 3 + 3 * planes)
 
         super(BasicBlock, self).__init__()
@@ -99,7 +114,7 @@ def __init__(self, inplanes, planes, conv_builder, stride=1, downsample=None):
         self.downsample = downsample
         self.stride = stride
 
-    def forward(self, x):
+    def forward(self, x: Tensor) -> Tensor:
         residual = x
 
         out = self.conv1(x)
@@ -116,7 +131,14 @@ def forward(self, x):
 class Bottleneck(nn.Module):
     expansion = 4
 
-    def __init__(self, inplanes, planes, conv_builder, stride=1, downsample=None):
+    def __init__(
+        self,
+        inplanes: int,
+        planes: int,
+        conv_builder: Callable[..., nn.Module],
+        stride: int = 1,
+        downsample: Optional[nn.Module] = None,
+    ) -> None:
 
         super(Bottleneck, self).__init__()
         midplanes = (inplanes * planes * 3 * 3 * 3) // (inplanes * 3 * 3 + 3 * planes)
@@ -143,7 +165,7 @@ def __init__(self, inplanes, planes, conv_builder, stride=1, downsample=None):
         self.downsample = downsample
         self.stride = stride
 
-    def forward(self, x):
+    def forward(self, x: Tensor) -> Tensor:
         residual = x
 
         out = self.conv1(x)
@@ -162,7 +184,7 @@ def forward(self, x):
 class BasicStem(nn.Sequential):
     """The default conv-batchnorm-relu stem
     """
-    def __init__(self):
+    def __init__(self) -> None:
         super(BasicStem, self).__init__(
             nn.Conv3d(3, 64, kernel_size=(3, 7, 7), stride=(1, 2, 2),
                       padding=(1, 3, 3), bias=False),
@@ -173,7 +195,7 @@ def __init__(self):
 class R2Plus1dStem(nn.Sequential):
     """R(2+1)D stem is different than the default one as it uses separated 3D convolution
     """
-    def __init__(self):
+    def __init__(self) -> None:
         super(R2Plus1dStem, self).__init__(
             nn.Conv3d(3, 45, kernel_size=(1, 7, 7),
                       stride=(1, 2, 2), padding=(0, 3, 3),
@@ -189,16 +211,23 @@ def __init__(self):
 
 class VideoResNet(nn.Module):
 
-    def __init__(self, block, conv_makers, layers,
-                 stem, num_classes=400,
-                 zero_init_residual=False):
+    def __init__(
+        self,
+        block: Type[Union[BasicBlock, Bottleneck]],
+        conv_makers: List[Type[Union[Conv3DSimple, Conv3DNoTemporal, Conv2Plus1D]]],
+        layers: List[int],
+        stem: Callable[..., nn.Module],
+        num_classes: int = 400,
+        zero_init_residual: bool = False,
+    ) -> None:
         """Generic resnet video generator.
 
         Args:
-            block (nn.Module): resnet building block
-            conv_makers (list(functions)): generator function for each layer
+            block (Type[Union[BasicBlock, Bottleneck]]): resnet building block
+            conv_makers (List[Type[Union[Conv3DSimple, Conv3DNoTemporal, Conv2Plus1D]]]): generator
+                function for each layer
             layers (List[int]): number of blocks per layer
-            stem (nn.Module, optional): Resnet stem, if None, defaults to conv-bn-relu. Defaults to None.
+            stem (Callable[..., nn.Module]): module specifying the ResNet stem.
             num_classes (int, optional): Dimension of the final FC layer. Defaults to 400.
             zero_init_residual (bool, optional): Zero init bottleneck residual BN. Defaults to False.
         """
@@ -221,9 +250,9 @@ def __init__(self, block, conv_makers, layers,
         if zero_init_residual:
             for m in self.modules():
                 if isinstance(m, Bottleneck):
-                    nn.init.constant_(m.bn3.weight, 0)
+                    nn.init.constant_(m.bn3.weight, 0)  # type: ignore[union-attr, arg-type]
 
-    def forward(self, x):
+    def forward(self, x: Tensor) -> Tensor:
         x = self.stem(x)
 
         x = self.layer1(x)
@@ -238,7 +267,14 @@ def forward(self, x):
 
         return x
 
-    def _make_layer(self, block, conv_builder, planes, blocks, stride=1):
+    def _make_layer(
+        self,
+        block: Type[Union[BasicBlock, Bottleneck]],
+        conv_builder: Type[Union[Conv3DSimple, Conv3DNoTemporal, Conv2Plus1D]],
+        planes: int,
+        blocks: int,
+        stride: int = 1
+    ) -> nn.Sequential:
         downsample = None
 
         if stride != 1 or self.inplanes != planes * block.expansion:
@@ -257,7 +293,7 @@ def _make_layer(self, block, conv_builder, planes, blocks, stride=1):
 
         return nn.Sequential(*layers)
 
-    def _initialize_weights(self):
+    def _initialize_weights(self) -> None:
         for m in self.modules():
             if isinstance(m, nn.Conv3d):
                 nn.init.kaiming_normal_(m.weight, mode='fan_out',
@@ -272,7 +308,7 @@ def _initialize_weights(self):
                 nn.init.constant_(m.bias, 0)
 
 
-def _video_resnet(arch, pretrained=False, progress=True, **kwargs):
+def _video_resnet(arch: str, pretrained: bool = False, progress: bool = True, **kwargs: Any) -> VideoResNet:
     model = VideoResNet(**kwargs)
 
     if pretrained:
@@ -282,7 +318,7 @@ def _video_resnet(arch, pretrained=False, progress=True, **kwargs):
     return model
 
 
-def r3d_18(pretrained=False, progress=True, **kwargs):
+def r3d_18(pretrained: bool = False, progress: bool = True, **kwargs: Any) -> VideoResNet:
     """Construct 18 layer Resnet3D model as in
     https://arxiv.org/abs/1711.11248
 
@@ -302,7 +338,7 @@ def r3d_18(pretrained=False, progress=True, **kwargs):
                          stem=BasicStem, **kwargs)
 
 
-def mc3_18(pretrained=False, progress=True, **kwargs):
+def mc3_18(pretrained: bool = False, progress: bool = True, **kwargs: Any) -> VideoResNet:
     """Constructor for 18 layer Mixed Convolution network as in
     https://arxiv.org/abs/1711.11248
 
@@ -316,12 +352,12 @@ def mc3_18(pretrained=False, progress=True, **kwargs):
     return _video_resnet('mc3_18',
                          pretrained, progress,
                          block=BasicBlock,
-                         conv_makers=[Conv3DSimple] + [Conv3DNoTemporal] * 3,
+                         conv_makers=[Conv3DSimple] + [Conv3DNoTemporal] * 3,  # type: ignore[list-item]
                          layers=[2, 2, 2, 2],
                          stem=BasicStem, **kwargs)
 
 
-def r2plus1d_18(pretrained=False, progress=True, **kwargs):
+def r2plus1d_18(pretrained: bool = False, progress: bool = True, **kwargs: Any) -> VideoResNet:
     """Constructor for the 18 layer deep R(2+1)D network as in
     https://arxiv.org/abs/1711.11248
 

From b72129c5338cf3e888b1ce5dbdc0f4cb31a653cf Mon Sep 17 00:00:00 2001
From: Philip Meier <github.pmeier@posteo.de>
Date: Mon, 23 Aug 2021 19:12:22 +0200
Subject: [PATCH 05/12] fix dependency table (#4308)

---
 README.rst | 20 ++++++++++----------
 1 file changed, 10 insertions(+), 10 deletions(-)

diff --git a/README.rst b/README.rst
index 35cef0c19df..bca61ce4680 100644
--- a/README.rst
+++ b/README.rst
@@ -21,25 +21,25 @@ supported Python versions.
 +--------------------------+--------------------------+---------------------------------+
 | ``torch``                | ``torchvision``          | ``python``                      |
 +==========================+==========================+=================================+
-| ``master`` / ``nightly`` | ``main`` / ``nightly`` | ``>=3.6``                       |
+| ``main`` / ``nightly``   | ``main`` / ``nightly``   | ``>=3.6``, ``<=3.9``            |
 +--------------------------+--------------------------+---------------------------------+
-| ``1.9.0``                | ``0.10.0``               | ``>=3.6``                       |
+| ``1.9.0``                | ``0.10.0``               | ``>=3.6``, ``<=3.9``            |
 +--------------------------+--------------------------+---------------------------------+
-| ``1.8.1``                | ``0.9.1``                | ``>=3.6``                       |
+| ``1.8.1``                | ``0.9.1``                | ``>=3.6``, ``<=3.9``            |
 +--------------------------+--------------------------+---------------------------------+
-| ``1.8.0``                | ``0.9.0``                | ``>=3.6``                       |
+| ``1.8.0``                | ``0.9.0``                | ``>=3.6``, ``<=3.9``            |
 +--------------------------+--------------------------+---------------------------------+
-| ``1.7.1``                | ``0.8.2``                | ``>=3.6``                       |
+| ``1.7.1``                | ``0.8.2``                | ``>=3.6``, ``<=3.9``            |
 +--------------------------+--------------------------+---------------------------------+
-| ``1.7.0``                | ``0.8.1``                | ``>=3.6``                       |
+| ``1.7.0``                | ``0.8.1``                | ``>=3.6``, ``<=3.8``            |
 +--------------------------+--------------------------+---------------------------------+
-| ``1.7.0``                | ``0.8.0``                | ``>=3.6``                       |
+| ``1.7.0``                | ``0.8.0``                | ``>=3.6``, ``<=3.8``            |
 +--------------------------+--------------------------+---------------------------------+
-| ``1.6.0``                | ``0.7.0``                | ``>=3.6``                       |
+| ``1.6.0``                | ``0.7.0``                | ``>=3.6``, ``<=3.8``            |
 +--------------------------+--------------------------+---------------------------------+
-| ``1.5.1``                | ``0.6.1``                | ``>=3.5``                       |
+| ``1.5.1``                | ``0.6.1``                | ``>=3.5``, ``<=3.8``            |
 +--------------------------+--------------------------+---------------------------------+
-| ``1.5.0``                | ``0.6.0``                | ``>=3.5``                       |
+| ``1.5.0``                | ``0.6.0``                | ``>=3.5``, ``<=3.8``            |
 +--------------------------+--------------------------+---------------------------------+
 | ``1.4.0``                | ``0.5.0``                | ``==2.7``, ``>=3.5``, ``<=3.8`` |
 +--------------------------+--------------------------+---------------------------------+

From 32f95c77f7cc6daa750221b37f14ded9973b886c Mon Sep 17 00:00:00 2001
From: Vasilis Vryniotis <datumbox@users.noreply.github.com>
Date: Thu, 26 Aug 2021 09:16:20 +0100
Subject: [PATCH 06/12] [FBcode->GH] Port quantize_val and dequantize_val into
 torchvision to avoid at::native and android xplat incompatibility (#4311)

Summary:
This diff ports `quantize_val` and `dequantize_val` from at::native to torchvision because native kernels are incompatible with android xplat builds (see D30234056).

This should only be temporary until we find a way to move those functions out of at::native, or until the at::native / android incompatibility disappears.

Reviewed By: fmassa

Differential Revision: D30393619

fbshipit-source-id: 18b7b1b349ad9a24088a120e23da7535f7fa7ddc

Co-authored-by: Nicolas Hug <nicolashug@fb.com>
---
 .../ops/quantized/cpu/qroi_align_kernel.cpp   | 96 +++++++++++++++++--
 1 file changed, 89 insertions(+), 7 deletions(-)

diff --git a/torchvision/csrc/ops/quantized/cpu/qroi_align_kernel.cpp b/torchvision/csrc/ops/quantized/cpu/qroi_align_kernel.cpp
index cfd5ec4ee97..20d4f503501 100644
--- a/torchvision/csrc/ops/quantized/cpu/qroi_align_kernel.cpp
+++ b/torchvision/csrc/ops/quantized/cpu/qroi_align_kernel.cpp
@@ -1,5 +1,4 @@
 #include <ATen/ATen.h>
-#include <ATen/native/quantized/affine_quantizer.h>
 #include <torch/library.h>
 
 #include "../../cpu/roi_align_common.h"
@@ -9,6 +8,90 @@ namespace ops {
 
 namespace {
 
+// BEGIN copy-pasted code from pytorch core
+// https://github.com/pytorch/pytorch/blob/master/aten/src/ATen/native/quantized/affine_quantizer_base.cpp
+// We're vendoring the quantize_val() and dequantize_val() functions here. The
+// reason is that these functions belong in at::native, which is incompatible
+// with android xplat support.
+
+// FIXME: Remove this section once we can use at::native for android xplat
+// builds, or when quantize_val() and dequantize_val() aren't in at::native
+
+#ifdef USE_FBGEMM
+template <typename T>
+T quantize_val(double scale, int64_t zero_point, float value) {
+  // Internally, fbgemm::Quantize uses std::nearbyint.
+  // std::nearbyint results in nearest integer value according to the current
+  // rounding mode and the default rounding mode is rounds to even in half-way
+  // cases in most popular processor architectures like x86 and ARM. This is
+  // typically faster than an alternatives like std::round that rounds half-way
+  // cases away from zero, and can be consistent with SIMD implementations for
+  // example in x86 using _mm512_cvtps_epi32 or mm512_round_ps with
+  // _MM_FROUND_CUR_DIRECTION option that also follow the current rounding mode.
+  // NOLINTNEXTLINE(cppcoreguidelines-init-variables)
+  int32_t qvalue;
+  // NOLINTNEXTLINE(bugprone-signed-char-misuse)
+  qvalue = fbgemm::Quantize<typename T::underlying, false /*LEGACY*/>(
+      value,
+      static_cast<int32_t>(zero_point),
+      static_cast<float>(scale),
+      /*result_precision=*/CHAR_BIT * sizeof(typename T::underlying));
+  return static_cast<T>(qvalue);
+}
+
+template <typename T>
+inline float dequantize_val(double scale, int64_t zero_point, T value) {
+  // NOLINTNEXTLINE(cppcoreguidelines-pro-type-member-init)
+  fbgemm::TensorQuantizationParams qparams;
+  qparams.scale = static_cast<float>(scale);
+  qparams.zero_point = static_cast<int32_t>(zero_point);
+  return fbgemm::Dequantize<typename T::underlying>(value.val_, qparams);
+}
+#else // USE_FBGEMM
+
+#if defined(__ANDROID__) && !defined(__NDK_MAJOR__)
+template <class T>
+inline float Round(const float x) {
+  return ::nearbyintf(x);
+}
+inline double Round(const double x) {
+  return ::nearbyint(x);
+}
+#else
+template <class T>
+inline T Round(const T x) {
+  return std::nearbyint(x);
+}
+#endif
+
+template <typename T>
+T quantize_val(double scale, int64_t zero_point, float value) {
+  // std::nearbyint results in nearest integer value according to the current
+  // rounding mode and the default rounding mode is rounds to even in half-way
+  // cases in most popular processor architectures like x86 and ARM. This is
+  // typically faster than an alternatives like std::round that rounds half-way
+  // cases away from zero, and can be consistent with SIMD implementations for
+  // example in x86 using _mm512_cvtps_epi32 or mm512_round_ps with
+  // _MM_FROUND_CUR_DIRECTION option that also follow the current rounding mode.
+  int64_t qvalue;
+  constexpr int64_t qmin = std::numeric_limits<typename T::underlying>::min();
+  constexpr int64_t qmax = std::numeric_limits<typename T::underlying>::max();
+  float inv_scale = 1.0f / static_cast<float>(scale);
+  qvalue = static_cast<int64_t>(zero_point + Round(value * inv_scale));
+  qvalue = std::max<int64_t>(qvalue, qmin);
+  qvalue = std::min<int64_t>(qvalue, qmax);
+  return static_cast<T>(qvalue);
+}
+
+template <typename T>
+float dequantize_val(double scale, int64_t zero_point, T value) {
+  // We need to convert the qint8 value to float to ensure the subtraction
+  // subexpression returns a float
+  return (static_cast<float>(value.val_) - zero_point) * scale;
+}
+#endif // USE_FBGEMM
+// END copy-pasted code from pytorch core
+
 template <typename T>
 void qroi_align_forward_kernel_impl(
     int n_rois,
@@ -46,19 +129,19 @@ void qroi_align_forward_kernel_impl(
     // Do not using rounding; this implementation detail is critical
     float offset = aligned ? 0.5 : 0.;
     float roi_start_w =
-        at::native::dequantize_val(rois_scale, rois_zp, offset_rois[1]) *
+        dequantize_val(rois_scale, rois_zp, offset_rois[1]) *
             spatial_scale -
         offset;
     float roi_start_h =
-        at::native::dequantize_val(rois_scale, rois_zp, offset_rois[2]) *
+        dequantize_val(rois_scale, rois_zp, offset_rois[2]) *
             spatial_scale -
         offset;
     float roi_end_w =
-        at::native::dequantize_val(rois_scale, rois_zp, offset_rois[3]) *
+        dequantize_val(rois_scale, rois_zp, offset_rois[3]) *
             spatial_scale -
         offset;
     float roi_end_h =
-        at::native::dequantize_val(rois_scale, rois_zp, offset_rois[4]) *
+        dequantize_val(rois_scale, rois_zp, offset_rois[4]) *
             spatial_scale -
         offset;
 
@@ -134,8 +217,7 @@ void qroi_align_forward_kernel_impl(
 
           output_val /= count; // Average pooling
 
-          output[index] =
-              at::native::quantize_val<T>(input_scale, input_zp, output_val);
+          output[index] = quantize_val<T>(input_scale, input_zp, output_val);
         } // for pw
       } // for ph
     } // for c

From 7a0184aa2dc8cb0ec2fd4f7cfe79dfb3717ee302 Mon Sep 17 00:00:00 2001
From: Vasilis Vryniotis <datumbox@users.noreply.github.com>
Date: Thu, 26 Aug 2021 09:47:13 +0100
Subject: [PATCH 07/12] [FBcode->GH] Moving logging on base class. (#4319)

---
 torchvision/datasets/cifar.py  | 1 -
 torchvision/datasets/mnist.py  | 1 -
 torchvision/datasets/vision.py | 1 +
 3 files changed, 1 insertion(+), 2 deletions(-)

diff --git a/torchvision/datasets/cifar.py b/torchvision/datasets/cifar.py
index 47b2bd41fb0..17a2b5ee9cd 100644
--- a/torchvision/datasets/cifar.py
+++ b/torchvision/datasets/cifar.py
@@ -59,7 +59,6 @@ def __init__(
 
         super(CIFAR10, self).__init__(root, transform=transform,
                                       target_transform=target_transform)
-        torch._C._log_api_usage_once(f"torchvision.datasets.{self.__class__.__name__}")
 
         self.train = train  # training set or test set
 
diff --git a/torchvision/datasets/mnist.py b/torchvision/datasets/mnist.py
index 0467854e9b3..237a135722f 100644
--- a/torchvision/datasets/mnist.py
+++ b/torchvision/datasets/mnist.py
@@ -77,7 +77,6 @@ def __init__(
     ) -> None:
         super(MNIST, self).__init__(root, transform=transform,
                                     target_transform=target_transform)
-        torch._C._log_api_usage_once(f"torchvision.datasets.{self.__class__.__name__}")
         self.train = train  # training set or test set
 
         if self._check_legacy_exist():
diff --git a/torchvision/datasets/vision.py b/torchvision/datasets/vision.py
index 2cc9ce14cb1..db44a8b1ba0 100644
--- a/torchvision/datasets/vision.py
+++ b/torchvision/datasets/vision.py
@@ -31,6 +31,7 @@ def __init__(
             transform: Optional[Callable] = None,
             target_transform: Optional[Callable] = None,
     ) -> None:
+        torch._C._log_api_usage_once(f"torchvision.datasets.{self.__class__.__name__}")
         if isinstance(root, torch._six.string_classes):
             root = os.path.expanduser(root)
         self.root = root

From d004d7798e7874179e9be5671244ca8b3f2760ec Mon Sep 17 00:00:00 2001
From: Vasilis Vryniotis <datumbox@users.noreply.github.com>
Date: Thu, 26 Aug 2021 10:38:45 +0100
Subject: [PATCH 08/12] Fix broken clang format test. (#4320)

---
 .../csrc/ops/quantized/cpu/qroi_align_kernel.cpp     | 12 ++++--------
 1 file changed, 4 insertions(+), 8 deletions(-)

diff --git a/torchvision/csrc/ops/quantized/cpu/qroi_align_kernel.cpp b/torchvision/csrc/ops/quantized/cpu/qroi_align_kernel.cpp
index 20d4f503501..15f468b31e7 100644
--- a/torchvision/csrc/ops/quantized/cpu/qroi_align_kernel.cpp
+++ b/torchvision/csrc/ops/quantized/cpu/qroi_align_kernel.cpp
@@ -129,20 +129,16 @@ void qroi_align_forward_kernel_impl(
     // Do not using rounding; this implementation detail is critical
     float offset = aligned ? 0.5 : 0.;
     float roi_start_w =
-        dequantize_val(rois_scale, rois_zp, offset_rois[1]) *
-            spatial_scale -
+        dequantize_val(rois_scale, rois_zp, offset_rois[1]) * spatial_scale -
         offset;
     float roi_start_h =
-        dequantize_val(rois_scale, rois_zp, offset_rois[2]) *
-            spatial_scale -
+        dequantize_val(rois_scale, rois_zp, offset_rois[2]) * spatial_scale -
         offset;
     float roi_end_w =
-        dequantize_val(rois_scale, rois_zp, offset_rois[3]) *
-            spatial_scale -
+        dequantize_val(rois_scale, rois_zp, offset_rois[3]) * spatial_scale -
         offset;
     float roi_end_h =
-        dequantize_val(rois_scale, rois_zp, offset_rois[4]) *
-            spatial_scale -
+        dequantize_val(rois_scale, rois_zp, offset_rois[4]) * spatial_scale -
         offset;
 
     float roi_width = roi_end_w - roi_start_w;

From 37a9ee5b3aead821dc1f795ec9274ccbeea695bb Mon Sep 17 00:00:00 2001
From: Vasilis Vryniotis <datumbox@users.noreply.github.com>
Date: Thu, 26 Aug 2021 11:03:37 +0100
Subject: [PATCH 09/12] Add EfficientNet Architecture in TorchVision (#4293)

* Adding code skeleton

* Adding MBConvConfig.

* Extend SqueezeExcitation to support custom min_value and activation.

* Implement MBConv.

* Replace stochastic_depth with operator.

* Adding the rest of the EfficientNet implementation

* Update torchvision/models/efficientnet.py

* Replacing 1st activation of SE with SiLU.

* Adding efficientnet_b3.

* Replace mobilenetv3 assets with custom.

* Switch to standard sigmoid and reconfiguring BN.

* Reconfiguration of efficientnet.

* Add repr

* Add weights.

* Update weights.

* Adding B5-B7 weights.

* Update docs and hubconf.

* Fix doc link.

* Fix typo on comment.
---
 docs/source/models.rst                        |  43 +-
 hubconf.py                                    |   2 +
 references/classification/README.md           |   6 +
 references/classification/presets.py          |   6 +-
 references/classification/train.py            |  17 +-
 ...odelTester.test_efficientnet_b0_expect.pkl | Bin 0 -> 939 bytes
 ...odelTester.test_efficientnet_b1_expect.pkl | Bin 0 -> 939 bytes
 ...odelTester.test_efficientnet_b2_expect.pkl | Bin 0 -> 939 bytes
 ...odelTester.test_efficientnet_b3_expect.pkl | Bin 0 -> 939 bytes
 ...odelTester.test_efficientnet_b4_expect.pkl | Bin 0 -> 939 bytes
 ...odelTester.test_efficientnet_b5_expect.pkl | Bin 0 -> 939 bytes
 ...odelTester.test_efficientnet_b6_expect.pkl | Bin 0 -> 939 bytes
 ...odelTester.test_efficientnet_b7_expect.pkl | Bin 0 -> 939 bytes
 torchvision/models/__init__.py                |   1 +
 torchvision/models/efficientnet.py            | 369 ++++++++++++++++++
 torchvision/ops/stochastic_depth.py           |   4 +-
 16 files changed, 441 insertions(+), 7 deletions(-)
 create mode 100644 test/expect/ModelTester.test_efficientnet_b0_expect.pkl
 create mode 100644 test/expect/ModelTester.test_efficientnet_b1_expect.pkl
 create mode 100644 test/expect/ModelTester.test_efficientnet_b2_expect.pkl
 create mode 100644 test/expect/ModelTester.test_efficientnet_b3_expect.pkl
 create mode 100644 test/expect/ModelTester.test_efficientnet_b4_expect.pkl
 create mode 100644 test/expect/ModelTester.test_efficientnet_b5_expect.pkl
 create mode 100644 test/expect/ModelTester.test_efficientnet_b6_expect.pkl
 create mode 100644 test/expect/ModelTester.test_efficientnet_b7_expect.pkl
 create mode 100644 torchvision/models/efficientnet.py

diff --git a/docs/source/models.rst b/docs/source/models.rst
index b9bff7a36e8..64ca69f47ae 100644
--- a/docs/source/models.rst
+++ b/docs/source/models.rst
@@ -27,6 +27,7 @@ architectures for image classification:
 -  `ResNeXt`_
 -  `Wide ResNet`_
 -  `MNASNet`_
+-  `EfficientNet`_
 
 You can construct a model with random weights by calling its constructor:
 
@@ -47,6 +48,14 @@ You can construct a model with random weights by calling its constructor:
     resnext50_32x4d = models.resnext50_32x4d()
     wide_resnet50_2 = models.wide_resnet50_2()
     mnasnet = models.mnasnet1_0()
+    efficientnet_b0 = models.efficientnet_b0()
+    efficientnet_b1 = models.efficientnet_b1()
+    efficientnet_b2 = models.efficientnet_b2()
+    efficientnet_b3 = models.efficientnet_b3()
+    efficientnet_b4 = models.efficientnet_b4()
+    efficientnet_b5 = models.efficientnet_b5()
+    efficientnet_b6 = models.efficientnet_b6()
+    efficientnet_b7 = models.efficientnet_b7()
 
 We provide pre-trained models, using the PyTorch :mod:`torch.utils.model_zoo`.
 These can be constructed by passing ``pretrained=True``:
@@ -68,6 +77,14 @@ These can be constructed by passing ``pretrained=True``:
     resnext50_32x4d = models.resnext50_32x4d(pretrained=True)
     wide_resnet50_2 = models.wide_resnet50_2(pretrained=True)
     mnasnet = models.mnasnet1_0(pretrained=True)
+    efficientnet_b0 = models.efficientnet_b0(pretrained=True)
+    efficientnet_b1 = models.efficientnet_b1(pretrained=True)
+    efficientnet_b2 = models.efficientnet_b2(pretrained=True)
+    efficientnet_b3 = models.efficientnet_b3(pretrained=True)
+    efficientnet_b4 = models.efficientnet_b4(pretrained=True)
+    efficientnet_b5 = models.efficientnet_b5(pretrained=True)
+    efficientnet_b6 = models.efficientnet_b6(pretrained=True)
+    efficientnet_b7 = models.efficientnet_b7(pretrained=True)
 
 Instancing a pre-trained model will download its weights to a cache directory.
 This directory can be set using the `TORCH_MODEL_ZOO` environment variable. See
@@ -113,7 +130,10 @@ Unfortunately, the concrete `subset` that was used is lost. For more
 information see `this discussion <https://github.com/pytorch/vision/issues/1439>`_
 or `these experiments <https://github.com/pytorch/vision/pull/1965>`_.
 
-ImageNet 1-crop error rates (224x224)
+The sizes of the EfficientNet models depend on the variant. For the exact input sizes
+`check here <https://github.com/pytorch/vision/blob/d2bfd639e46e1c5dc3c177f889dc7750c8d137c7/references/classification/train.py#L92-L93>`_
+
+ImageNet 1-crop error rates
 
 ================================  =============   =============
 Model                             Acc@1           Acc@5
@@ -151,6 +171,14 @@ Wide ResNet-50-2                  78.468          94.086
 Wide ResNet-101-2                 78.848          94.284
 MNASNet 1.0                       73.456          91.510
 MNASNet 0.5                       67.734          87.490
+EfficientNet-B0                   77.692          93.532
+EfficientNet-B1                   78.642          94.186
+EfficientNet-B2                   80.608          95.310
+EfficientNet-B3                   82.008          96.054
+EfficientNet-B4                   83.384          96.594
+EfficientNet-B5                   83.444          96.628
+EfficientNet-B6                   84.008          96.916
+EfficientNet-B7                   84.122          96.908
 ================================  =============   =============
 
 
@@ -166,6 +194,7 @@ MNASNet 0.5                       67.734          87.490
 .. _MobileNetV3: https://arxiv.org/abs/1905.02244
 .. _ResNeXt: https://arxiv.org/abs/1611.05431
 .. _MNASNet: https://arxiv.org/abs/1807.11626
+.. _EfficientNet: https://arxiv.org/abs/1905.11946
 
 .. currentmodule:: torchvision.models
 
@@ -267,6 +296,18 @@ MNASNet
 .. autofunction:: mnasnet1_0
 .. autofunction:: mnasnet1_3
 
+EfficientNet
+------------
+
+.. autofunction:: efficientnet_b0
+.. autofunction:: efficientnet_b1
+.. autofunction:: efficientnet_b2
+.. autofunction:: efficientnet_b3
+.. autofunction:: efficientnet_b4
+.. autofunction:: efficientnet_b5
+.. autofunction:: efficientnet_b6
+.. autofunction:: efficientnet_b7
+
 Quantized Models
 ----------------
 
diff --git a/hubconf.py b/hubconf.py
index 097759bdd89..2bff6850525 100644
--- a/hubconf.py
+++ b/hubconf.py
@@ -15,6 +15,8 @@
 from torchvision.models.mobilenetv3 import mobilenet_v3_large, mobilenet_v3_small
 from torchvision.models.mnasnet import mnasnet0_5, mnasnet0_75, mnasnet1_0, \
     mnasnet1_3
+from torchvision.models.efficientnet import efficientnet_b0, efficientnet_b1, efficientnet_b2, \
+    efficientnet_b3, efficientnet_b4, efficientnet_b5, efficientnet_b6, efficientnet_b7
 
 # segmentation
 from torchvision.models.segmentation import fcn_resnet50, fcn_resnet101, \
diff --git a/references/classification/README.md b/references/classification/README.md
index e0b7f210175..210a63c0bca 100644
--- a/references/classification/README.md
+++ b/references/classification/README.md
@@ -68,6 +68,12 @@ Then we averaged the parameters of the last 3 checkpoints that improved the Acc@
 and [#3354](https://github.com/pytorch/vision/pull/3354) for details.
 
 
+### EfficientNet
+
+The weights of the B0-B4 variants are ported from Ross Wightman's [timm repo](https://github.com/rwightman/pytorch-image-models/blob/01cb46a9a50e3ba4be167965b5764e9702f09b30/timm/models/efficientnet.py#L95-L108).
+
+The weights of the B5-B7 variants are ported from Luke Melas' [EfficientNet-PyTorch repo](https://github.com/lukemelas/EfficientNet-PyTorch/blob/1039e009545d9329ea026c9f7541341439712b96/efficientnet_pytorch/utils.py#L562-L564).
+
 ## Mixed precision training
 Automatic Mixed Precision (AMP) training on GPU for Pytorch can be enabled with the [NVIDIA Apex extension](https://github.com/NVIDIA/apex).
 
diff --git a/references/classification/presets.py b/references/classification/presets.py
index 6bb389ba8db..ce5a6fe414f 100644
--- a/references/classification/presets.py
+++ b/references/classification/presets.py
@@ -1,4 +1,5 @@
 from torchvision.transforms import autoaugment, transforms
+from torchvision.transforms.functional import InterpolationMode
 
 
 class ClassificationPresetTrain:
@@ -24,10 +25,11 @@ def __call__(self, img):
 
 
 class ClassificationPresetEval:
-    def __init__(self, crop_size, resize_size=256, mean=(0.485, 0.456, 0.406), std=(0.229, 0.224, 0.225)):
+    def __init__(self, crop_size, resize_size=256, mean=(0.485, 0.456, 0.406), std=(0.229, 0.224, 0.225),
+                 interpolation=InterpolationMode.BILINEAR):
 
         self.transforms = transforms.Compose([
-            transforms.Resize(resize_size),
+            transforms.Resize(resize_size, interpolation=interpolation),
             transforms.CenterCrop(crop_size),
             transforms.ToTensor(),
             transforms.Normalize(mean=mean, std=std),
diff --git a/references/classification/train.py b/references/classification/train.py
index b4e9d274662..9ba99b3dc54 100644
--- a/references/classification/train.py
+++ b/references/classification/train.py
@@ -6,6 +6,7 @@
 import torch.utils.data
 from torch import nn
 import torchvision
+from torchvision.transforms.functional import InterpolationMode
 
 import presets
 import utils
@@ -82,7 +83,18 @@ def _get_cache_path(filepath):
 def load_data(traindir, valdir, args):
     # Data loading code
     print("Loading data")
-    resize_size, crop_size = (342, 299) if args.model == 'inception_v3' else (256, 224)
+    resize_size, crop_size = 256, 224
+    interpolation = InterpolationMode.BILINEAR
+    if args.model == 'inception_v3':
+        resize_size, crop_size = 342, 299
+    elif args.model.startswith('efficientnet_'):
+        sizes = {
+            'b0': (256, 224), 'b1': (256, 240), 'b2': (288, 288), 'b3': (320, 300),
+            'b4': (384, 380), 'b5': (456, 456), 'b6': (528, 528), 'b7': (600, 600),
+        }
+        e_type = args.model.replace('efficientnet_', '')
+        resize_size, crop_size = sizes[e_type]
+        interpolation = InterpolationMode.BICUBIC
 
     print("Loading training data")
     st = time.time()
@@ -113,7 +125,8 @@ def load_data(traindir, valdir, args):
     else:
         dataset_test = torchvision.datasets.ImageFolder(
             valdir,
-            presets.ClassificationPresetEval(crop_size=crop_size, resize_size=resize_size))
+            presets.ClassificationPresetEval(crop_size=crop_size, resize_size=resize_size,
+                                             interpolation=interpolation))
         if args.cache_dataset:
             print("Saving dataset_test to {}".format(cache_path))
             utils.mkdir(os.path.dirname(cache_path))
diff --git a/test/expect/ModelTester.test_efficientnet_b0_expect.pkl b/test/expect/ModelTester.test_efficientnet_b0_expect.pkl
new file mode 100644
index 0000000000000000000000000000000000000000..1de871ce0fbea9ddbab7e315b05f864bc5f6fa53
GIT binary patch
literal 939
zcmWIWW@cev;NW1u00Im`42ea_8JT6N`YDMeiFyUuIc`pT3{fbcfhoBpAE-(%zO*DW
zr<g0gC^e}xGbbg!BsH%%zbL-Uh^vr6LnDG6XnrwJWny}2AtOW!ms?JLVo5MWkgJd>
zf)S|3ppZF&8AvA=loqmh8<iBY26{7iGkP1f6|#97c{8>ZvUemW=jY_4CYNO9=M{7L
z7p0^YrKY%KCYNv(a%ct>a+VZw1r>7Z1$eV_Fj*X^nFTZrgadH;l#f9R#i#lPZcb`w
z{zUOK5-018ua*Pfs^#x<c4;yM1+Uul`hmvhFTz?Hzh7vsG!t9-$k0=3kwexhiOR}V
zD|XJ+IHT^VwQ4!H7Q^wNWo-BRH9d@5HSgLzTluoGf2DQ1;;KCPO)KM;)@un>PFi_;
zv9K0Hr_XYs!)hxHiiB76xiM&!7+hHqaUyty&u#rxmqYZm9FCn>e!TV0N{8tctD22Y
zto-_HtET<$?;8KNtkLvZ8@#Hby-)M{2L`S7kJnb}?onTL<EiGV5<8t$`%agxx*f>4
z(nWmJ3Z2()R)9jw@aV*QCx9UZ!ni|=pTQa)T4kw4#lTo_b229~xR62)!ZhXr*?e(c
zdMFdnRuB&GW&~02G>IIC0w4(#fSy9pbtC(U4@KuIAP-r$z5%*kWLNQ{=#>Dv5T+Lz
z1_9n|Y&uXya?HAL<)Fk20x)_zgv&4q>`9P!*+6-N!4s+glnDa7S=m5h%s>cI4^ayM
DfM5D$

literal 0
HcmV?d00001

diff --git a/test/expect/ModelTester.test_efficientnet_b1_expect.pkl b/test/expect/ModelTester.test_efficientnet_b1_expect.pkl
new file mode 100644
index 0000000000000000000000000000000000000000..1499a97028eef11527829f13476152d00d8cb90b
GIT binary patch
literal 939
zcmWIWW@cev;NW1u00Im`42ea_8JT6N`YDMeiFyUuIc`pT3{fbcfhoBpAE-(%zO*DW
zr<g0gC^e}xGbbg!BsH%%zbL-Uh^vr6LnDG6XnrwJWny}2AtOW!ms?JLVo5MWkgJd>
zf)S|3ppZF&8AvA=loqmh8<iBY26{7iGkP1f6|#97c{8>ZvUemW=jY_4CYNO9=M{7L
z7p0^YrKY%KCYNv(a%ct>a+VZw1r>7Z1$eV_Fj*X^nFTZrgadH;l#f9R#i#lPZcb`w
z{zUOK5~t%{$I3Y`E@{3?+`dBT8o%a@g$Aqq3Jo-$FwI<bXJ6~8WnvCmB@N6gw_g)q
zbztdKO+S|38uFa;G#QpiFSY0})lzs9x5CW$r=~zw%c>0(K3b>rdNr2iX=piAm1#2Z
zO;|bi<i3?V?;h4Hm@T=2$)!&-N%yJdk}uwxsXe<_OkQoUYQ^hUnwm-9G%uK6(p+lf
ztHoFAvPw*%R<myM(iLLr_cQ|C(^kC*P}DMd-m7)>!lIQ1zr8e9xlCI1lOs**LQ<yI
zbR!)t$wM`(K%u2?lf3f;Fr+{jcWCi5Si?iBEVZZ@7z=Jr=0pY;QpiD=##|tqFV0I3
zWdhm?!U5ilAPSx)k>gMRB!L3ZQz*J_WIyqt=zIm_A?wyRK-Y`xDt;8b5<nNi^g_cR
zz?+Rt2dYSpSr@Jxl$b#PMsJ63876@}3Gyx*C~q)$LKT2AL4Y?a8%T^92tn#0Y5^sG
B@SFeu

literal 0
HcmV?d00001

diff --git a/test/expect/ModelTester.test_efficientnet_b2_expect.pkl b/test/expect/ModelTester.test_efficientnet_b2_expect.pkl
new file mode 100644
index 0000000000000000000000000000000000000000..f0aeb8ec122d5a350052ad5788918607cfb0cf91
GIT binary patch
literal 939
zcmWIWW@cev;NW1u00Im`42ea_8JT6N`YDMeiFyUuIc`pT3{fbcfhoBpAE-(%zO*DW
zr<g0gC^e}xGbbg!BsH%%zbL-Uh^vr6LnDG6XnrwJWny}2AtOW!ms?JLVo5MWkgJd>
zf)S|3ppZF&8AvA=loqmh8<iBY26{7iGkP1f6|#97c{8>ZvUemW=jY_4CYNO9=M{7L
z7p0^YrKY%KCYNv(a%ct>a+VZw1r>7Z1$eV_Fj*X^nFTZrgadH;l#f9R#i#lPZcb`w
z{zUOK66alKk>+9fY|Tr8yERh|pIVuo7OWY`RI%#8jfLuN4~jJfR_AL9?)tFuz<oQd
z#ME;uPc>?+TFl9&C0F@kMb};5l`hRjTFp}$S6(SPu~KL9%oRO1ertT=vDW&>x@zS-
zMK-O2#~*8U{L$805UjpjcEj!!cM1+_?7Q@9rNm2RtqNw|RX6lIRtn1XuH3Qi#R`Y1
zdo^FR-d-8P>!NXi?fc3_-Qrq2Yx^~L>v>j9KiRdCF_%NjC8b?6d+Prc;q5Y7Hp`f_
z!l!nv(0wPN1q!XA=X2do07D9dafcQ^gEc&~%2JDpfwAD`WKLvoA%z@-Y0L$(`Qp6v
zP$r<QAROS$2%_L=5;+b9KoTebJ%ys{M)ngQiq2O+9<pwI19ZK}uHr}0D*<#NOfNJH
z0=(JSbfAjln04XGL5UdzVDxqfmthjvlOXT1f$|1}CsY9_69jm(vVp{yfe@q~q80!T
CIQ82A

literal 0
HcmV?d00001

diff --git a/test/expect/ModelTester.test_efficientnet_b3_expect.pkl b/test/expect/ModelTester.test_efficientnet_b3_expect.pkl
new file mode 100644
index 0000000000000000000000000000000000000000..989d6782fe799c4833239c51f08e8375a6592179
GIT binary patch
literal 939
zcmWIWW@cev;NW1u00Im`42ea_8JT6N`YDMeiFyUuIc`pT3{fbcfhoBpAE-(%zO*DW
zr<g0gC^e}xGbbg!BsH%%zbL-Uh^vr6LnDG6XnrwJWny}2AtOW!ms?JLVo5MWkgJd>
zf)S|3ppZF&8AvA=loqmh8<iBY26{7iGkP1f6|#97c{8>ZvUemW=jY_4CYNO9=M{7L
z7p0^YrKY%KCYNv(a%ct>a+VZw1r>7Z1$eV_Fj*X^nFTZrgadH;l#f9R#i#lPZcb`w
z{zUOK5-0rg$`xua*)#*ASv8-Wj$84~<H$;#$Htl;OP8#yV3S$Jm7%_>;r;v-qC8VI
zf4EOtIawlHlOa8Dg>ql_$|F*b)x+)_Tj@}_QMGFFL`^>tDa~m*3M*Nj`L6u6KWZh1
z1FyQvf>|rGo`q`O<QG`Ez{GpS!N*HivdQ|ckTdMkh?sC}`SWY1S8mL!)@=CBw(Ql@
zwv{Igl2?3deWzKGE46a{<^wBNsj_NbQrfY?z<K|Q&njUW5B`7FbU%Du^L9)7$^$+t
zmlb{6p)u{p;+3G#648p^cmfzwAdEY-_!+F>p;eYzR1Ay-Hz#u<g9|C-AWUN}kj)q8
zrH3*BZ3W=~Z$=OWPm{=TC;*Z`0q7|dT{p6y_)v7d0`ib`>l>i!MRpZGie3qz3t@Vp
zVG!WW#-;;RB*&}^R}M<dAONGcL%0l+z@7womkpFR7(Ag0K$#%Go0SbD#teiY^$@iH
D5Eb}{

literal 0
HcmV?d00001

diff --git a/test/expect/ModelTester.test_efficientnet_b4_expect.pkl b/test/expect/ModelTester.test_efficientnet_b4_expect.pkl
new file mode 100644
index 0000000000000000000000000000000000000000..f4a0cc04bf0ff3eee9f110f43a4c39a530c51f5e
GIT binary patch
literal 939
zcmWIWW@cev;NW1u00Im`42ea_8JT6N`YDMeiFyUuIc`pT3{fbcfhoBpAE-(%zO*DW
zr<g0gC^e}xGbbg!BsH%%zbL-Uh^vr6LnDG6XnrwJWny}2AtOW!ms?JLVo5MWkgJd>
zf)S|3ppZF&8AvA=loqmh8<iBY26{7iGkP1f6|#97c{8>ZvUemW=jY_4CYNO9=M{7L
z7p0^YrKY%KCYNv(a%ct>a+VZw1r>7Z1$eV_Fj*X^nFTZrgadH;l#f9R#i#lPZcb`w
z{zUOK633uVe8mU<N6R^9#;tf>TA*RQ;o@?ggxAaD)<kObOh2i9BBo|VVN~D>Qw!yl
z2j1LJx9*Zz@kNS9qr2zmiZ!=iEN8jVptiPRnZ|@YTQqDQhHA`;3fE9kTCAbdE3m>#
z@uCL9zWvHBdOJ0^vQ$@24Sb<-|B=ATcia)n?ZXn5H>~ARx4t%U1sDIN70%z6t*}tL
zqp|4nD)koT^c8z2<Z8HRuxi@a?$&5Y6kPGRhfhO6-DKHDe&LlF2N~3D@6K7V>{`hR
zH_^`J&(bwCK%pi0=k$~lz>orA+@ZzKU=0tgvecqtU@W*fnG+dYNFfJd8gqeczBn&E
zlnH1n2nTpGf+%>JM2<rNkOT@qPoe0#k^RJnqVpAyhpb!Q09`M#tN2m$N&sC5(+drQ
z0B<%n9jGEXW?i^)P+|rF7`+|BWtar^B*?pLpuEB02~_~f1OeWxY#=daAOxw0s09F<
C2JyiF

literal 0
HcmV?d00001

diff --git a/test/expect/ModelTester.test_efficientnet_b5_expect.pkl b/test/expect/ModelTester.test_efficientnet_b5_expect.pkl
new file mode 100644
index 0000000000000000000000000000000000000000..7c674259cd99d2663d7a85d1b74042e93bc40a1e
GIT binary patch
literal 939
zcmWIWW@cev;NW1u00Im`42ea_8JT6N`YDMeiFyUuIc`pT3{fbcfhoBpAE-(%zO*DW
zr<g0gC^e}xGbbg!BsH%%zbL-Uh^vr6LnDG6XnrwJWny}2AtOW!ms?JLVo5MWkgJd>
zf)S|3ppZF&8AvA=loqmh8<iBY26{7iGkP1f6|#97c{8>ZvUemW=jY_4CYNO9=M{7L
z7p0^YrKY%KCYNv(a%ct>a+VZw1r>7Z1$eV_Fj*X^nFTZrgadH;l#f9R#i#lPZcb`w
z{zUOK66d#r>56sJ&S*R+oxft`o>?nReQ#-K#dT;*IU=KeY{@2#7pI#ww64TzG)1bf
z^pM}Dp>rTo!^2rr^H|I_js5X&HN4ELG<4DfS6GT4*9g)-t<nF5b>)%}h80Eu$Cvwv
zwP|R~ch&qqalQsygNw$WmjW7mO*hrE-gK^vdT?#U-;c935{g?_$af`Z*ce|~5%k$r
z-LCAE#w_*^E9zhAYGi2osO8;`(>!x>-txTXZki38Jj*|prmDYLb!Yj>y>m2nttwq{
z+;zLgtLNvJgF;Iuqqg(}Fr+{jcWCi5Si?iBEVZZ@7z=Jr=0pY;QpiD=##|tqFV0I3
zWdhm?!U5ilAPSx)k>gMRB!L3ZQz*J_WIyqt=zIm_A?wyRK-Y`xDt;8b5<nNi^g_cR
zz?+Rt2dYSpSr@Jxl$b#PMsJ63876@}3Gyx*C~q)$LKT2AL4Y?a8%T^92tn#0Y5~K?
B^v(bP

literal 0
HcmV?d00001

diff --git a/test/expect/ModelTester.test_efficientnet_b6_expect.pkl b/test/expect/ModelTester.test_efficientnet_b6_expect.pkl
new file mode 100644
index 0000000000000000000000000000000000000000..dfad29717e4ba731576248d2836582e04dc85b78
GIT binary patch
literal 939
zcmWIWW@cev;NW1u00Im`42ea_8JT6N`YDMeiFyUuIc`pT3{fbcfhoBpAE-(%zO*DW
zr<g0gC^e}xGbbg!BsH%%zbL-Uh^vr6LnDG6XnrwJWny}2AtOW!ms?JLVo5MWkgJd>
zf)S|3ppZF&8AvA=loqmh8<iBY26{7iGkP1f6|#97c{8>ZvUemW=jY_4CYNO9=M{7L
z7p0^YrKY%KCYNv(a%ct>a+VZw1r>7Z1$eV_Fj*X^nFTZrgadH;l#f9R#i#lPZcb`w
z{zUOK5=Y_3bG6o+A}hY|ny%RLJyzo%i<|~m{wj?#m%l8%aDD%ZY4XbID^Cfom=ir=
z#j2_Q)DQGjFWcTCydp%~Tw~AIIQ6={tr{YV50}j|x}x!JN9Br)Fq0Kk-@DZ_FD_qR
zah`w0AFaL>3_2fFzj4|wf04Lo>8fuR)Z64HtMja!w0wc?k!7u+5$ZApEz93eb6>u4
z?ph7Y-)S0OANDOTtYy@Y$kbVWHT;@-z=^`;NzZ%K`3@ghv7#?iL+^R^a*vgB)fNcE
zsa|$opwV#n#u89yHO8NHI{^$S5XK!^{0!Fc&?-wUDh9@ao0B<_!G#oZ5T-E~$mWal
z(nFbmwt{egHzSCGr%B{E6aYz}0Q3}!t{d4;d?-3!0eQ%}^$pPVBD;zoMXv<Vg)qI)
zFbMEwW7B~ul4I6|D+eWJ5P;F!AzX$@U{8X)%Ld9D44zO0piB_p&B_K6V+KNydWc#8
DqXzq;

literal 0
HcmV?d00001

diff --git a/test/expect/ModelTester.test_efficientnet_b7_expect.pkl b/test/expect/ModelTester.test_efficientnet_b7_expect.pkl
new file mode 100644
index 0000000000000000000000000000000000000000..965ee61a2efc11c7de3cd6af749f0a07b39c2ced
GIT binary patch
literal 939
zcmWIWW@cev;NW1u00Im`42ea_8JT6N`YDMeiFyUuIc`pT3{fbcfhoBpAE-(%zO*DW
zr<g0gC^e}xGbbg!BsH%%zbL-Uh^vr6LnDG6XnrwJWny}2AtOW!ms?JLVo5MWkgJd>
zf)S|3ppZF&8AvA=loqmh8<iBY26{7iGkP1f6|#97c{8>ZvUemW=jY_4CYNO9=M{7L
z7p0^YrKY%KCYNv(a%ct>a+VZw1r>7Z1$eV_Fj*X^nFTZrgadH;l#f9R#i#lPZcb`w
z{zUOK66f2#>&qwI;$Lpnv`Ia40_zH`Wabr~)w5T$-<Mt<t#(k|<ly<`A?8n)Y0W#n
ze1`36wJ*0et6OwkUbbr0<K-4=PnSQl4_)qXXtG+2gUO03F*lY?GU-@ub(%|Ee9iF{
z@8-2?Y>CTQ5p>vS#q@6>8VfGesIDp9ygcE?UX46s|K;Jf*2^n4GcJ3PsHFa-`t~xB
zIg;uTPBm&(3TM=K!UWXW&7Ll^^4zVyW&ieNr#a6oUo_Qkg=OWh<ykpR8Vc<*md~h=
zQ};XXseU7B-ZD^ViK}Y=H~|bP5XK!^{0!Fc&?-wUDh9@ao0B<_!G#oZ5T-E~$mWal
z(nFbmwt{egHzSCGr%B{E6aYz}0Q3}!t{d4;d?-3!0eQ%}^$pPVBD;zoMXv<Vg)qI)
zFbMEwW7B~ul4I6|D+eWJ5P;F!AzX$@U{8X)%Ld9D44zO0piB_p&B_K6V+KNydWc#8
D7^3%=

literal 0
HcmV?d00001

diff --git a/torchvision/models/__init__.py b/torchvision/models/__init__.py
index 283e544e98e..3c1519c1b42 100644
--- a/torchvision/models/__init__.py
+++ b/torchvision/models/__init__.py
@@ -8,6 +8,7 @@
 from .mobilenet import *
 from .mnasnet import *
 from .shufflenetv2 import *
+from .efficientnet import *
 from . import segmentation
 from . import detection
 from . import video
diff --git a/torchvision/models/efficientnet.py b/torchvision/models/efficientnet.py
new file mode 100644
index 00000000000..06b2a301b6d
--- /dev/null
+++ b/torchvision/models/efficientnet.py
@@ -0,0 +1,369 @@
+import copy
+import math
+import torch
+
+from functools import partial
+from torch import nn, Tensor
+from torch.nn import functional as F
+from typing import Any, Callable, List, Optional, Sequence
+
+from .._internally_replaced_utils import load_state_dict_from_url
+from torchvision.ops import StochasticDepth
+
+from torchvision.models.mobilenetv2 import ConvBNActivation, _make_divisible
+
+
+__all__ = ["EfficientNet", "efficientnet_b0", "efficientnet_b1", "efficientnet_b2", "efficientnet_b3",
+           "efficientnet_b4", "efficientnet_b5", "efficientnet_b6", "efficientnet_b7"]
+
+
+model_urls = {
+    # Weights ported from https://github.com/rwightman/pytorch-image-models/
+    "efficientnet_b0": "https://download.pytorch.org/models/efficientnet_b0_rwightman-3dd342df.pth",
+    "efficientnet_b1": "https://download.pytorch.org/models/efficientnet_b1_rwightman-533bc792.pth",
+    "efficientnet_b2": "https://download.pytorch.org/models/efficientnet_b2_rwightman-bcdf34b7.pth",
+    "efficientnet_b3": "https://download.pytorch.org/models/efficientnet_b3_rwightman-cf984f9c.pth",
+    "efficientnet_b4": "https://download.pytorch.org/models/efficientnet_b4_rwightman-7eb33cd5.pth",
+    # Weights ported from https://github.com/lukemelas/EfficientNet-PyTorch/
+    "efficientnet_b5": "https://download.pytorch.org/models/efficientnet_b5_lukemelas-b6417697.pth",
+    "efficientnet_b6": "https://download.pytorch.org/models/efficientnet_b6_lukemelas-c76e70fd.pth",
+    "efficientnet_b7": "https://download.pytorch.org/models/efficientnet_b7_lukemelas-dcc49843.pth",
+}
+
+
+class SqueezeExcitation(nn.Module):
+    def __init__(self, input_channels: int, squeeze_channels: int):
+        super().__init__()
+        self.fc1 = nn.Conv2d(input_channels, squeeze_channels, 1)
+        self.fc2 = nn.Conv2d(squeeze_channels, input_channels, 1)
+
+    def _scale(self, input: Tensor) -> Tensor:
+        scale = F.adaptive_avg_pool2d(input, 1)
+        scale = self.fc1(scale)
+        scale = F.silu(scale, inplace=True)
+        scale = self.fc2(scale)
+        return scale.sigmoid()
+
+    def forward(self, input: Tensor) -> Tensor:
+        scale = self._scale(input)
+        return scale * input
+
+
+class MBConvConfig:
+    # Stores information listed at Table 1 of the EfficientNet paper
+    def __init__(self,
+                 expand_ratio: float, kernel: int, stride: int,
+                 input_channels: int, out_channels: int, num_layers: int,
+                 width_mult: float, depth_mult: float) -> None:
+        self.expand_ratio = expand_ratio
+        self.kernel = kernel
+        self.stride = stride
+        self.input_channels = self.adjust_channels(input_channels, width_mult)
+        self.out_channels = self.adjust_channels(out_channels, width_mult)
+        self.num_layers = self.adjust_depth(num_layers, depth_mult)
+
+    def __repr__(self) -> str:
+        s = self.__class__.__name__ + '('
+        s += 'expand_ratio={expand_ratio}'
+        s += ', kernel={kernel}'
+        s += ', stride={stride}'
+        s += ', input_channels={input_channels}'
+        s += ', out_channels={out_channels}'
+        s += ', num_layers={num_layers}'
+        s += ')'
+        return s.format(**self.__dict__)
+
+    @staticmethod
+    def adjust_channels(channels: int, width_mult: float, min_value: Optional[int] = None) -> int:
+        return _make_divisible(channels * width_mult, 8, min_value)
+
+    @staticmethod
+    def adjust_depth(num_layers: int, depth_mult: float):
+        return int(math.ceil(num_layers * depth_mult))
+
+
+class MBConv(nn.Module):
+    def __init__(self, cnf: MBConvConfig, stochastic_depth_prob: float, norm_layer: Callable[..., nn.Module],
+                 se_layer: Callable[..., nn.Module] = SqueezeExcitation) -> None:
+        super().__init__()
+
+        if not (1 <= cnf.stride <= 2):
+            raise ValueError('illegal stride value')
+
+        self.use_res_connect = cnf.stride == 1 and cnf.input_channels == cnf.out_channels
+
+        layers: List[nn.Module] = []
+        activation_layer = nn.SiLU
+
+        # expand
+        expanded_channels = cnf.adjust_channels(cnf.input_channels, cnf.expand_ratio)
+        if expanded_channels != cnf.input_channels:
+            layers.append(ConvBNActivation(cnf.input_channels, expanded_channels, kernel_size=1,
+                                           norm_layer=norm_layer, activation_layer=activation_layer))
+
+        # depthwise
+        layers.append(ConvBNActivation(expanded_channels, expanded_channels, kernel_size=cnf.kernel,
+                                       stride=cnf.stride, groups=expanded_channels,
+                                       norm_layer=norm_layer, activation_layer=activation_layer))
+
+        # squeeze and excitation
+        squeeze_channels = max(1, cnf.input_channels // 4)
+        layers.append(se_layer(expanded_channels, squeeze_channels))
+
+        # project
+        layers.append(ConvBNActivation(expanded_channels, cnf.out_channels, kernel_size=1, norm_layer=norm_layer,
+                                       activation_layer=nn.Identity))
+
+        self.block = nn.Sequential(*layers)
+        self.stochastic_depth = StochasticDepth(stochastic_depth_prob, "row")
+        self.out_channels = cnf.out_channels
+
+    def forward(self, input: Tensor) -> Tensor:
+        result = self.block(input)
+        if self.use_res_connect:
+            result = self.stochastic_depth(result)
+            result += input
+        return result
+
+
+class EfficientNet(nn.Module):
+    def __init__(
+            self,
+            inverted_residual_setting: List[MBConvConfig],
+            dropout: float,
+            stochastic_depth_prob: float = 0.2,
+            num_classes: int = 1000,
+            block: Optional[Callable[..., nn.Module]] = None,
+            norm_layer: Optional[Callable[..., nn.Module]] = None,
+            **kwargs: Any
+    ) -> None:
+        """
+        EfficientNet main class
+
+        Args:
+            inverted_residual_setting (List[MBConvConfig]): Network structure
+            dropout (float): The droupout probability
+            stochastic_depth_prob (float): The stochastic depth probability
+            num_classes (int): Number of classes
+            block (Optional[Callable[..., nn.Module]]): Module specifying inverted residual building block for mobilenet
+            norm_layer (Optional[Callable[..., nn.Module]]): Module specifying the normalization layer to use
+        """
+        super().__init__()
+
+        if not inverted_residual_setting:
+            raise ValueError("The inverted_residual_setting should not be empty")
+        elif not (isinstance(inverted_residual_setting, Sequence) and
+                  all([isinstance(s, MBConvConfig) for s in inverted_residual_setting])):
+            raise TypeError("The inverted_residual_setting should be List[MBConvConfig]")
+
+        if block is None:
+            block = MBConv
+
+        if norm_layer is None:
+            norm_layer = nn.BatchNorm2d
+
+        layers: List[nn.Module] = []
+
+        # building first layer
+        firstconv_output_channels = inverted_residual_setting[0].input_channels
+        layers.append(ConvBNActivation(3, firstconv_output_channels, kernel_size=3, stride=2, norm_layer=norm_layer,
+                                       activation_layer=nn.SiLU))
+
+        # building inverted residual blocks
+        total_stage_blocks = sum([cnf.num_layers for cnf in inverted_residual_setting])
+        stage_block_id = 0
+        for cnf in inverted_residual_setting:
+            stage: List[nn.Module] = []
+            for _ in range(cnf.num_layers):
+                # copy to avoid modifications. shallow copy is enough
+                block_cnf = copy.copy(cnf)
+
+                # overwrite info if not the first conv in the stage
+                if stage:
+                    block_cnf.input_channels = block_cnf.out_channels
+                    block_cnf.stride = 1
+
+                # adjust stochastic depth probability based on the depth of the stage block
+                sd_prob = stochastic_depth_prob * float(stage_block_id) / total_stage_blocks
+
+                stage.append(block(block_cnf, sd_prob, norm_layer))
+                stage_block_id += 1
+
+            layers.append(nn.Sequential(*stage))
+
+        # building last several layers
+        lastconv_input_channels = inverted_residual_setting[-1].out_channels
+        lastconv_output_channels = 4 * lastconv_input_channels
+        layers.append(ConvBNActivation(lastconv_input_channels, lastconv_output_channels, kernel_size=1,
+                                       norm_layer=norm_layer, activation_layer=nn.SiLU))
+
+        self.features = nn.Sequential(*layers)
+        self.avgpool = nn.AdaptiveAvgPool2d(1)
+        self.classifier = nn.Sequential(
+            nn.Dropout(p=dropout, inplace=True),
+            nn.Linear(lastconv_output_channels, num_classes),
+        )
+
+        for m in self.modules():
+            if isinstance(m, nn.Conv2d):
+                nn.init.kaiming_normal_(m.weight, mode='fan_out')
+                if m.bias is not None:
+                    nn.init.zeros_(m.bias)
+            elif isinstance(m, (nn.BatchNorm2d, nn.GroupNorm)):
+                nn.init.ones_(m.weight)
+                nn.init.zeros_(m.bias)
+            elif isinstance(m, nn.Linear):
+                init_range = 1.0 / math.sqrt(m.out_features)
+                nn.init.uniform_(m.weight, -init_range, init_range)
+                nn.init.zeros_(m.bias)
+
+    def _forward_impl(self, x: Tensor) -> Tensor:
+        x = self.features(x)
+
+        x = self.avgpool(x)
+        x = torch.flatten(x, 1)
+
+        x = self.classifier(x)
+
+        return x
+
+    def forward(self, x: Tensor) -> Tensor:
+        return self._forward_impl(x)
+
+
+def _efficientnet_conf(width_mult: float, depth_mult: float, **kwargs: Any) -> List[MBConvConfig]:
+    bneck_conf = partial(MBConvConfig, width_mult=width_mult, depth_mult=depth_mult)
+    inverted_residual_setting = [
+        bneck_conf(1, 3, 1, 32, 16, 1),
+        bneck_conf(6, 3, 2, 16, 24, 2),
+        bneck_conf(6, 5, 2, 24, 40, 2),
+        bneck_conf(6, 3, 2, 40, 80, 3),
+        bneck_conf(6, 5, 1, 80, 112, 3),
+        bneck_conf(6, 5, 2, 112, 192, 4),
+        bneck_conf(6, 3, 1, 192, 320, 1),
+    ]
+    return inverted_residual_setting
+
+
+def _efficientnet_model(
+    arch: str,
+    inverted_residual_setting: List[MBConvConfig],
+    dropout: float,
+    pretrained: bool,
+    progress: bool,
+    **kwargs: Any
+) -> EfficientNet:
+    model = EfficientNet(inverted_residual_setting, dropout, **kwargs)
+    if pretrained:
+        if model_urls.get(arch, None) is None:
+            raise ValueError("No checkpoint is available for model type {}".format(arch))
+        state_dict = load_state_dict_from_url(model_urls[arch], progress=progress)
+        model.load_state_dict(state_dict)
+    return model
+
+
+def efficientnet_b0(pretrained: bool = False, progress: bool = True, **kwargs: Any) -> EfficientNet:
+    """
+    Constructs a EfficientNet B0 architecture from
+    `"EfficientNet: Rethinking Model Scaling for Convolutional Neural Networks" <https://arxiv.org/abs/1905.11946>`_.
+
+    Args:
+        pretrained (bool): If True, returns a model pre-trained on ImageNet
+        progress (bool): If True, displays a progress bar of the download to stderr
+    """
+    inverted_residual_setting = _efficientnet_conf(width_mult=1.0, depth_mult=1.0, **kwargs)
+    return _efficientnet_model("efficientnet_b0", inverted_residual_setting, 0.2, pretrained, progress, **kwargs)
+
+
+def efficientnet_b1(pretrained: bool = False, progress: bool = True, **kwargs: Any) -> EfficientNet:
+    """
+    Constructs a EfficientNet B1 architecture from
+    `"EfficientNet: Rethinking Model Scaling for Convolutional Neural Networks" <https://arxiv.org/abs/1905.11946>`_.
+
+    Args:
+        pretrained (bool): If True, returns a model pre-trained on ImageNet
+        progress (bool): If True, displays a progress bar of the download to stderr
+    """
+    inverted_residual_setting = _efficientnet_conf(width_mult=1.0, depth_mult=1.1, **kwargs)
+    return _efficientnet_model("efficientnet_b1", inverted_residual_setting, 0.2, pretrained, progress, **kwargs)
+
+
+def efficientnet_b2(pretrained: bool = False, progress: bool = True, **kwargs: Any) -> EfficientNet:
+    """
+    Constructs a EfficientNet B2 architecture from
+    `"EfficientNet: Rethinking Model Scaling for Convolutional Neural Networks" <https://arxiv.org/abs/1905.11946>`_.
+
+    Args:
+        pretrained (bool): If True, returns a model pre-trained on ImageNet
+        progress (bool): If True, displays a progress bar of the download to stderr
+    """
+    inverted_residual_setting = _efficientnet_conf(width_mult=1.1, depth_mult=1.2, **kwargs)
+    return _efficientnet_model("efficientnet_b2", inverted_residual_setting, 0.3, pretrained, progress, **kwargs)
+
+
+def efficientnet_b3(pretrained: bool = False, progress: bool = True, **kwargs: Any) -> EfficientNet:
+    """
+    Constructs a EfficientNet B3 architecture from
+    `"EfficientNet: Rethinking Model Scaling for Convolutional Neural Networks" <https://arxiv.org/abs/1905.11946>`_.
+
+    Args:
+        pretrained (bool): If True, returns a model pre-trained on ImageNet
+        progress (bool): If True, displays a progress bar of the download to stderr
+    """
+    inverted_residual_setting = _efficientnet_conf(width_mult=1.2, depth_mult=1.4, **kwargs)
+    return _efficientnet_model("efficientnet_b3", inverted_residual_setting, 0.3, pretrained, progress, **kwargs)
+
+
+def efficientnet_b4(pretrained: bool = False, progress: bool = True, **kwargs: Any) -> EfficientNet:
+    """
+    Constructs a EfficientNet B4 architecture from
+    `"EfficientNet: Rethinking Model Scaling for Convolutional Neural Networks" <https://arxiv.org/abs/1905.11946>`_.
+
+    Args:
+        pretrained (bool): If True, returns a model pre-trained on ImageNet
+        progress (bool): If True, displays a progress bar of the download to stderr
+    """
+    inverted_residual_setting = _efficientnet_conf(width_mult=1.4, depth_mult=1.8, **kwargs)
+    return _efficientnet_model("efficientnet_b4", inverted_residual_setting, 0.4, pretrained, progress, **kwargs)
+
+
+def efficientnet_b5(pretrained: bool = False, progress: bool = True, **kwargs: Any) -> EfficientNet:
+    """
+    Constructs a EfficientNet B5 architecture from
+    `"EfficientNet: Rethinking Model Scaling for Convolutional Neural Networks" <https://arxiv.org/abs/1905.11946>`_.
+
+    Args:
+        pretrained (bool): If True, returns a model pre-trained on ImageNet
+        progress (bool): If True, displays a progress bar of the download to stderr
+    """
+    inverted_residual_setting = _efficientnet_conf(width_mult=1.6, depth_mult=2.2, **kwargs)
+    return _efficientnet_model("efficientnet_b5", inverted_residual_setting, 0.4, pretrained, progress,
+                               norm_layer=partial(nn.BatchNorm2d, eps=0.001, momentum=0.01), **kwargs)
+
+
+def efficientnet_b6(pretrained: bool = False, progress: bool = True, **kwargs: Any) -> EfficientNet:
+    """
+    Constructs a EfficientNet B6 architecture from
+    `"EfficientNet: Rethinking Model Scaling for Convolutional Neural Networks" <https://arxiv.org/abs/1905.11946>`_.
+
+    Args:
+        pretrained (bool): If True, returns a model pre-trained on ImageNet
+        progress (bool): If True, displays a progress bar of the download to stderr
+    """
+    inverted_residual_setting = _efficientnet_conf(width_mult=1.8, depth_mult=2.6, **kwargs)
+    return _efficientnet_model("efficientnet_b6", inverted_residual_setting, 0.5, pretrained, progress,
+                               norm_layer=partial(nn.BatchNorm2d, eps=0.001, momentum=0.01), **kwargs)
+
+
+def efficientnet_b7(pretrained: bool = False, progress: bool = True, **kwargs: Any) -> EfficientNet:
+    """
+    Constructs a EfficientNet B7 architecture from
+    `"EfficientNet: Rethinking Model Scaling for Convolutional Neural Networks" <https://arxiv.org/abs/1905.11946>`_.
+
+    Args:
+        pretrained (bool): If True, returns a model pre-trained on ImageNet
+        progress (bool): If True, displays a progress bar of the download to stderr
+    """
+    inverted_residual_setting = _efficientnet_conf(width_mult=2.0, depth_mult=3.1, **kwargs)
+    return _efficientnet_model("efficientnet_b7", inverted_residual_setting, 0.5, pretrained, progress,
+                               norm_layer=partial(nn.BatchNorm2d, eps=0.001, momentum=0.01), **kwargs)
diff --git a/torchvision/ops/stochastic_depth.py b/torchvision/ops/stochastic_depth.py
index f3338242a76..0b95e7cca67 100644
--- a/torchvision/ops/stochastic_depth.py
+++ b/torchvision/ops/stochastic_depth.py
@@ -22,12 +22,12 @@ def stochastic_depth(input: Tensor, p: float, mode: str, training: bool = True)
     """
     if p < 0.0 or p > 1.0:
         raise ValueError("drop probability has to be between 0 and 1, but got {}".format(p))
+    if mode not in ["batch", "row"]:
+        raise ValueError("mode has to be either 'batch' or 'row', but got {}".format(mode))
     if not training or p == 0.0:
         return input
 
     survival_rate = 1.0 - p
-    if mode not in ["batch", "row"]:
-        raise ValueError("mode has to be either 'batch' or 'row', but got {}".format(mode))
     size = [1] * input.ndim
     if mode == "row":
         size[0] = input.shape[0]

From 96f6e0a117d5c56f7e0237851dbb96144ebb110b Mon Sep 17 00:00:00 2001
From: Vasilis Vryniotis <datumbox@users.noreply.github.com>
Date: Thu, 26 Aug 2021 13:58:09 +0100
Subject: [PATCH 10/12] Make get_image_size and get_image_num_channels public.
 (#4321)

---
 references/detection/transforms.py          |  8 ++---
 test/test_functional_tensor.py              | 20 ++++++++++-
 torchvision/transforms/autoaugment.py       |  6 ++--
 torchvision/transforms/functional.py        | 38 ++++++++++++++-------
 torchvision/transforms/functional_pil.py    |  6 ++--
 torchvision/transforms/functional_tensor.py | 14 ++++----
 torchvision/transforms/transforms.py        | 18 +++++-----
 7 files changed, 70 insertions(+), 40 deletions(-)

diff --git a/references/detection/transforms.py b/references/detection/transforms.py
index 8e4b8870eaf..2d50adb1295 100644
--- a/references/detection/transforms.py
+++ b/references/detection/transforms.py
@@ -33,7 +33,7 @@ def forward(self, image: Tensor,
         if torch.rand(1) < self.p:
             image = F.hflip(image)
             if target is not None:
-                width, _ = F._get_image_size(image)
+                width, _ = F.get_image_size(image)
                 target["boxes"][:, [0, 2]] = width - target["boxes"][:, [2, 0]]
                 if "masks" in target:
                     target["masks"] = target["masks"].flip(-1)
@@ -76,7 +76,7 @@ def forward(self, image: Tensor,
             elif image.ndimension() == 2:
                 image = image.unsqueeze(0)
 
-        orig_w, orig_h = F._get_image_size(image)
+        orig_w, orig_h = F.get_image_size(image)
 
         while True:
             # sample an option
@@ -157,7 +157,7 @@ def forward(self, image: Tensor,
         if torch.rand(1) < self.p:
             return image, target
 
-        orig_w, orig_h = F._get_image_size(image)
+        orig_w, orig_h = F.get_image_size(image)
 
         r = self.side_range[0] + torch.rand(1) * (self.side_range[1] - self.side_range[0])
         canvas_width = int(orig_w * r)
@@ -226,7 +226,7 @@ def forward(self, image: Tensor,
                 image = self._contrast(image)
 
         if r[6] < self.p:
-            channels = F._get_image_num_channels(image)
+            channels = F.get_image_num_channels(image)
             permutation = torch.randperm(channels)
 
             is_pil = F._is_pil_image(image)
diff --git a/test/test_functional_tensor.py b/test/test_functional_tensor.py
index 30ee144888c..717e2a7cb33 100644
--- a/test/test_functional_tensor.py
+++ b/test/test_functional_tensor.py
@@ -31,6 +31,24 @@
 NEAREST, BILINEAR, BICUBIC = InterpolationMode.NEAREST, InterpolationMode.BILINEAR, InterpolationMode.BICUBIC
 
 
+@pytest.mark.parametrize('device', cpu_and_gpu())
+@pytest.mark.parametrize('fn', [F.get_image_size, F.get_image_num_channels])
+def test_image_sizes(device, fn):
+    script_F = torch.jit.script(fn)
+
+    img_tensor, pil_img = _create_data(16, 18, 3, device=device)
+    value_img = fn(img_tensor)
+    value_pil_img = fn(pil_img)
+    assert value_img == value_pil_img
+
+    value_img_script = script_F(img_tensor)
+    assert value_img == value_img_script
+
+    batch_tensors = _create_data_batch(16, 18, 3, num_samples=4, device=device)
+    value_img_batch = fn(batch_tensors)
+    assert value_img == value_img_batch
+
+
 @needs_cuda
 def test_scale_channel():
     """Make sure that _scale_channel gives the same results on CPU and GPU as
@@ -908,7 +926,7 @@ def test_resized_crop(device, mode):
 
 @pytest.mark.parametrize('device', cpu_and_gpu())
 @pytest.mark.parametrize('func, args', [
-    (F_t._get_image_size, ()), (F_t.vflip, ()),
+    (F_t.get_image_size, ()), (F_t.vflip, ()),
     (F_t.hflip, ()), (F_t.crop, (1, 2, 4, 5)),
     (F_t.adjust_brightness, (0., )), (F_t.adjust_contrast, (1., )),
     (F_t.adjust_hue, (-0.5, )), (F_t.adjust_saturation, (2., )),
diff --git a/torchvision/transforms/autoaugment.py b/torchvision/transforms/autoaugment.py
index 3b6c927a4eb..e241b821871 100644
--- a/torchvision/transforms/autoaugment.py
+++ b/torchvision/transforms/autoaugment.py
@@ -188,7 +188,7 @@ def forward(self, img: Tensor) -> Tensor:
         fill = self.fill
         if isinstance(img, Tensor):
             if isinstance(fill, (int, float)):
-                fill = [float(fill)] * F._get_image_num_channels(img)
+                fill = [float(fill)] * F.get_image_num_channels(img)
             elif fill is not None:
                 fill = [float(f) for f in fill]
 
@@ -209,10 +209,10 @@ def forward(self, img: Tensor) -> Tensor:
                     img = F.affine(img, angle=0.0, translate=[0, 0], scale=1.0, shear=[0.0, math.degrees(magnitude)],
                                    interpolation=self.interpolation, fill=fill)
                 elif op_name == "TranslateX":
-                    img = F.affine(img, angle=0.0, translate=[int(F._get_image_size(img)[0] * magnitude), 0], scale=1.0,
+                    img = F.affine(img, angle=0.0, translate=[int(F.get_image_size(img)[0] * magnitude), 0], scale=1.0,
                                    interpolation=self.interpolation, shear=[0.0, 0.0], fill=fill)
                 elif op_name == "TranslateY":
-                    img = F.affine(img, angle=0.0, translate=[0, int(F._get_image_size(img)[1] * magnitude)], scale=1.0,
+                    img = F.affine(img, angle=0.0, translate=[0, int(F.get_image_size(img)[1] * magnitude)], scale=1.0,
                                    interpolation=self.interpolation, shear=[0.0, 0.0], fill=fill)
                 elif op_name == "Rotate":
                     img = F.rotate(img, magnitude, interpolation=self.interpolation, fill=fill)
diff --git a/torchvision/transforms/functional.py b/torchvision/transforms/functional.py
index dc3a9f8f68b..679631a971e 100644
--- a/torchvision/transforms/functional.py
+++ b/torchvision/transforms/functional.py
@@ -58,22 +58,34 @@ def _interpolation_modes_from_int(i: int) -> InterpolationMode:
 _is_pil_image = F_pil._is_pil_image
 
 
-def _get_image_size(img: Tensor) -> List[int]:
-    """Returns image size as [w, h]
+def get_image_size(img: Tensor) -> List[int]:
+    """Returns the size of an image as [width, height].
+
+    Args:
+        img (PIL Image or Tensor): The image to be checked.
+
+    Returns:
+        List[int]: The image size.
     """
     if isinstance(img, torch.Tensor):
-        return F_t._get_image_size(img)
+        return F_t.get_image_size(img)
 
-    return F_pil._get_image_size(img)
+    return F_pil.get_image_size(img)
 
 
-def _get_image_num_channels(img: Tensor) -> int:
-    """Returns number of image channels
+def get_image_num_channels(img: Tensor) -> int:
+    """Returns the number of channels of an image.
+
+    Args:
+        img (PIL Image or Tensor): The image to be checked.
+
+    Returns:
+        int: The number of channels.
     """
     if isinstance(img, torch.Tensor):
-        return F_t._get_image_num_channels(img)
+        return F_t.get_image_num_channels(img)
 
-    return F_pil._get_image_num_channels(img)
+    return F_pil.get_image_num_channels(img)
 
 
 @torch.jit.unused
@@ -500,7 +512,7 @@ def center_crop(img: Tensor, output_size: List[int]) -> Tensor:
     elif isinstance(output_size, (tuple, list)) and len(output_size) == 1:
         output_size = (output_size[0], output_size[0])
 
-    image_width, image_height = _get_image_size(img)
+    image_width, image_height = get_image_size(img)
     crop_height, crop_width = output_size
 
     if crop_width > image_width or crop_height > image_height:
@@ -511,7 +523,7 @@ def center_crop(img: Tensor, output_size: List[int]) -> Tensor:
             (crop_height - image_height + 1) // 2 if crop_height > image_height else 0,
         ]
         img = pad(img, padding_ltrb, fill=0)  # PIL uses fill value 0
-        image_width, image_height = _get_image_size(img)
+        image_width, image_height = get_image_size(img)
         if crop_width == image_width and crop_height == image_height:
             return img
 
@@ -696,7 +708,7 @@ def five_crop(img: Tensor, size: List[int]) -> Tuple[Tensor, Tensor, Tensor, Ten
     if len(size) != 2:
         raise ValueError("Please provide only two dimensions (h, w) for size.")
 
-    image_width, image_height = _get_image_size(img)
+    image_width, image_height = get_image_size(img)
     crop_height, crop_width = size
     if crop_width > image_width or crop_height > image_height:
         msg = "Requested crop size {} is bigger than input size {}"
@@ -993,7 +1005,7 @@ def rotate(
 
     center_f = [0.0, 0.0]
     if center is not None:
-        img_size = _get_image_size(img)
+        img_size = get_image_size(img)
         # Center values should be in pixel coordinates but translated such that (0, 0) corresponds to image center.
         center_f = [1.0 * (c - s * 0.5) for c, s in zip(center, img_size)]
 
@@ -1094,7 +1106,7 @@ def affine(
     if len(shear) != 2:
         raise ValueError("Shear should be a sequence containing two values. Got {}".format(shear))
 
-    img_size = _get_image_size(img)
+    img_size = get_image_size(img)
     if not isinstance(img, torch.Tensor):
         # center = (img_size[0] * 0.5 + 0.5, img_size[1] * 0.5 + 0.5)
         # it is visually better to estimate the center without 0.5 offset
diff --git a/torchvision/transforms/functional_pil.py b/torchvision/transforms/functional_pil.py
index 0f3d1d87bb4..67f4ff4bb33 100644
--- a/torchvision/transforms/functional_pil.py
+++ b/torchvision/transforms/functional_pil.py
@@ -20,14 +20,14 @@ def _is_pil_image(img: Any) -> bool:
 
 
 @torch.jit.unused
-def _get_image_size(img: Any) -> List[int]:
+def get_image_size(img: Any) -> List[int]:
     if _is_pil_image(img):
-        return img.size
+        return list(img.size)
     raise TypeError("Unexpected type {}".format(type(img)))
 
 
 @torch.jit.unused
-def _get_image_num_channels(img: Any) -> int:
+def get_image_num_channels(img: Any) -> int:
     if _is_pil_image(img):
         return 1 if img.mode == 'L' else 3
     raise TypeError("Unexpected type {}".format(type(img)))
diff --git a/torchvision/transforms/functional_tensor.py b/torchvision/transforms/functional_tensor.py
index 3e4069bb0c0..61c07433cb6 100644
--- a/torchvision/transforms/functional_tensor.py
+++ b/torchvision/transforms/functional_tensor.py
@@ -16,13 +16,13 @@ def _assert_image_tensor(img: Tensor) -> None:
         raise TypeError("Tensor is not a torch image.")
 
 
-def _get_image_size(img: Tensor) -> List[int]:
+def get_image_size(img: Tensor) -> List[int]:
     # Returns (w, h) of tensor image
     _assert_image_tensor(img)
     return [img.shape[-1], img.shape[-2]]
 
 
-def _get_image_num_channels(img: Tensor) -> int:
+def get_image_num_channels(img: Tensor) -> int:
     if img.ndim == 2:
         return 1
     elif img.ndim > 2:
@@ -50,7 +50,7 @@ def _max_value(dtype: torch.dtype) -> float:
 
 
 def _assert_channels(img: Tensor, permitted: List[int]) -> None:
-    c = _get_image_num_channels(img)
+    c = get_image_num_channels(img)
     if c not in permitted:
         raise TypeError("Input image tensor permitted channel values are {}, but found {}".format(permitted, c))
 
@@ -122,7 +122,7 @@ def hflip(img: Tensor) -> Tensor:
 def crop(img: Tensor, top: int, left: int, height: int, width: int) -> Tensor:
     _assert_image_tensor(img)
 
-    w, h = _get_image_size(img)
+    w, h = get_image_size(img)
     right = left + width
     bottom = top + height
 
@@ -187,7 +187,7 @@ def adjust_hue(img: Tensor, hue_factor: float) -> Tensor:
     _assert_image_tensor(img)
 
     _assert_channels(img, [1, 3])
-    if _get_image_num_channels(img) == 1:  # Match PIL behaviour
+    if get_image_num_channels(img) == 1:  # Match PIL behaviour
         return img
 
     orig_dtype = img.dtype
@@ -513,7 +513,7 @@ def resize(
     if antialias and interpolation not in ["bilinear", "bicubic"]:
         raise ValueError("Antialias option is supported for bilinear and bicubic interpolation modes only")
 
-    w, h = _get_image_size(img)
+    w, h = get_image_size(img)
 
     if isinstance(size, int) or len(size) == 1:  # specified size only for the smallest edge
         short, long = (w, h) if w <= h else (h, w)
@@ -586,7 +586,7 @@ def _assert_grid_transform_inputs(
         warnings.warn("Argument fill should be either int, float, tuple or list")
 
     # Check fill
-    num_channels = _get_image_num_channels(img)
+    num_channels = get_image_num_channels(img)
     if isinstance(fill, (tuple, list)) and (len(fill) > 1 and len(fill) != num_channels):
         msg = ("The number of elements in 'fill' cannot broadcast to match the number of "
                "channels of the image ({} != {})")
diff --git a/torchvision/transforms/transforms.py b/torchvision/transforms/transforms.py
index 954d5f5f064..a5582b20a45 100644
--- a/torchvision/transforms/transforms.py
+++ b/torchvision/transforms/transforms.py
@@ -575,7 +575,7 @@ def get_params(img: Tensor, output_size: Tuple[int, int]) -> Tuple[int, int, int
         Returns:
             tuple: params (i, j, h, w) to be passed to ``crop`` for random crop.
         """
-        w, h = F._get_image_size(img)
+        w, h = F.get_image_size(img)
         th, tw = output_size
 
         if h + 1 < th or w + 1 < tw:
@@ -613,7 +613,7 @@ def forward(self, img):
         if self.padding is not None:
             img = F.pad(img, self.padding, self.fill, self.padding_mode)
 
-        width, height = F._get_image_size(img)
+        width, height = F.get_image_size(img)
         # pad the width if needed
         if self.pad_if_needed and width < self.size[1]:
             padding = [self.size[1] - width, 0]
@@ -742,12 +742,12 @@ def forward(self, img):
         fill = self.fill
         if isinstance(img, Tensor):
             if isinstance(fill, (int, float)):
-                fill = [float(fill)] * F._get_image_num_channels(img)
+                fill = [float(fill)] * F.get_image_num_channels(img)
             else:
                 fill = [float(f) for f in fill]
 
         if torch.rand(1) < self.p:
-            width, height = F._get_image_size(img)
+            width, height = F.get_image_size(img)
             startpoints, endpoints = self.get_params(width, height, self.distortion_scale)
             return F.perspective(img, startpoints, endpoints, self.interpolation, fill)
         return img
@@ -858,7 +858,7 @@ def get_params(
             tuple: params (i, j, h, w) to be passed to ``crop`` for a random
             sized crop.
         """
-        width, height = F._get_image_size(img)
+        width, height = F.get_image_size(img)
         area = height * width
 
         log_ratio = torch.log(torch.tensor(ratio))
@@ -1280,7 +1280,7 @@ def forward(self, img):
         fill = self.fill
         if isinstance(img, Tensor):
             if isinstance(fill, (int, float)):
-                fill = [float(fill)] * F._get_image_num_channels(img)
+                fill = [float(fill)] * F.get_image_num_channels(img)
             else:
                 fill = [float(f) for f in fill]
         angle = self.get_params(self.degrees)
@@ -1439,11 +1439,11 @@ def forward(self, img):
         fill = self.fill
         if isinstance(img, Tensor):
             if isinstance(fill, (int, float)):
-                fill = [float(fill)] * F._get_image_num_channels(img)
+                fill = [float(fill)] * F.get_image_num_channels(img)
             else:
                 fill = [float(f) for f in fill]
 
-        img_size = F._get_image_size(img)
+        img_size = F.get_image_size(img)
 
         ret = self.get_params(self.degrees, self.translate, self.scale, self.shear, img_size)
 
@@ -1529,7 +1529,7 @@ def forward(self, img):
         Returns:
             PIL Image or Tensor: Randomly grayscaled image.
         """
-        num_output_channels = F._get_image_num_channels(img)
+        num_output_channels = F.get_image_num_channels(img)
         if torch.rand(1) < self.p:
             return F.rgb_to_grayscale(img, num_output_channels=num_output_channels)
         return img

From a3d9c416c70ca9fe645678675a46dc0bfe6c5556 Mon Sep 17 00:00:00 2001
From: Vasilis Vryniotis <datumbox@users.noreply.github.com>
Date: Fri, 27 Aug 2021 09:50:48 +0100
Subject: [PATCH 11/12] Do not disable profiling executor in ONNX tests (#4324)

[ghstack-poisoned]

Co-authored-by: Bert Maher <bertrand@fb.com>
---
 test/test_onnx.py | 8 --------
 1 file changed, 8 deletions(-)

diff --git a/test/test_onnx.py b/test/test_onnx.py
index ce0bc5c7b97..cd3239cef16 100644
--- a/test/test_onnx.py
+++ b/test/test_onnx.py
@@ -397,10 +397,6 @@ def test_faster_rcnn(self):
     # This test also compares both paste_masks_in_image and _onnx_paste_masks_in_image
     # (since jit_trace witll call _onnx_paste_masks_in_image).
     def test_paste_mask_in_image(self):
-        # disable profiling
-        torch._C._jit_set_profiling_executor(False)
-        torch._C._jit_set_profiling_mode(False)
-
         masks = torch.rand(10, 1, 26, 26)
         boxes = torch.rand(10, 4)
         boxes[:, 2:] += torch.rand(10, 2)
@@ -452,10 +448,6 @@ def test_mask_rcnn(self):
     # This test also compares both heatmaps_to_keypoints and _onnx_heatmaps_to_keypoints
     # (since jit_trace witll call _heatmaps_to_keypoints).
     def test_heatmaps_to_keypoints(self):
-        # disable profiling
-        torch._C._jit_set_profiling_executor(False)
-        torch._C._jit_set_profiling_mode(False)
-
         maps = torch.rand(10, 1, 26, 26)
         rois = torch.rand(10, 4)
         from torchvision.models.detection.roi_heads import heatmaps_to_keypoints

From 2365bdc617f0d4dcd85ecd3311764bd8e58541ea Mon Sep 17 00:00:00 2001
From: Nicolas Hug <nicolashug@fb.com>
Date: Fri, 27 Aug 2021 14:44:37 +0200
Subject: [PATCH 12/12] Simplify bug report

---
 .github/ISSUE_TEMPLATE/bug-report.yml | 35 ++++++++-------------------
 1 file changed, 10 insertions(+), 25 deletions(-)

diff --git a/.github/ISSUE_TEMPLATE/bug-report.yml b/.github/ISSUE_TEMPLATE/bug-report.yml
index ce9ae1a313a..3da5447d6a7 100644
--- a/.github/ISSUE_TEMPLATE/bug-report.yml
+++ b/.github/ISSUE_TEMPLATE/bug-report.yml
@@ -9,14 +9,9 @@ body:
 - type: textarea
   attributes:
     label: 🐛 Describe the bug
-    description: >
-      A clear and concise description of what the bug is.
-  validations:
-    required: true
-- type: textarea
-  attributes:
-    label: Steps/Code to Reproduce
     description: |
+      A clear and concise description of what the bug is.
+
       Please add a minimal example so that we can reproduce the error by running the code. It is very important for he snippet to be as succinct (minimal) as possible, so please take time to trim down any irrelevant code to help us debug efficiently. We are going to copy-paste your code and we expect to get the same result as you did: avoid any external data, and include the relevant imports, etc. For example:
 
       ```python
@@ -33,28 +28,18 @@ body:
       ```
 
       If the code is too long (hopefully, it isn't), feel free to put it in a public gist and link it in the issue: https://gist.github.com.
+
+      Please also paste or describe the results you observe instead of the expected results. If you observe an error, please paste the error message including the **full** traceback of the exception. It may be relevant to wrap error messages in ```` ```triple quotes blocks``` ````.
     placeholder: |
+      <A clear and concise description of what the bug is.>
+
       ```python
       Sample code to reproduce the problem
       ```
-  validations:
-    required: true
-- type: textarea
-  attributes:
-    label: Expected Results
-    description: >
-      Please paste or describe the expected results.
-    placeholder: >
-      Example: No error is thrown.
-  validations:
-    required: true
-- type: textarea
-  attributes:
-    label: Actual Results
-    description: >
-      Please paste or describe the results you observe instead of the expected results. If you observe an error, please paste the error message including the **full** traceback of the exception.  It may be relevant to wrap error messages in ```` ```triple quotes blocks``` ````.
-    placeholder: >
-      Please paste or specifically describe the actual output or traceback.
+
+      ```
+      The error message you got, with the full traceback.
+      ````
   validations:
     required: true
 - type: textarea