From ef177c56f853f409bad11f07ac515a175555da0f Mon Sep 17 00:00:00 2001 From: Vasilis Vryniotis Date: Fri, 20 Aug 2021 14:15:51 +0100 Subject: [PATCH 01/12] Add StochasticDepth implementation (#4301) * Adding operator. * Adding tests * switching order of `p` and `mode`. * Remove seed setting. * Replace stats import with pytest.importorskip. * Fix doc * Apply suggestions from code review Co-authored-by: Francisco Massa * Fixing indentation. * Adding operator in the documentation. * Fixing lint Co-authored-by: Francisco Massa --- docs/source/ops.rst | 2 ++ test/test_ops.py | 28 +++++++++++++++ torchvision/ops/__init__.py | 3 +- torchvision/ops/stochastic_depth.py | 56 +++++++++++++++++++++++++++++ 4 files changed, 88 insertions(+), 1 deletion(-) create mode 100644 torchvision/ops/stochastic_depth.py diff --git a/docs/source/ops.rst b/docs/source/ops.rst index cdebe9721c3..ecef74dd8a6 100644 --- a/docs/source/ops.rst +++ b/docs/source/ops.rst @@ -23,6 +23,7 @@ torchvision.ops .. autofunction:: ps_roi_pool .. autofunction:: deform_conv2d .. autofunction:: sigmoid_focal_loss +.. autofunction:: stochastic_depth .. autoclass:: RoIAlign .. autoclass:: PSRoIAlign @@ -31,3 +32,4 @@ torchvision.ops .. autoclass:: DeformConv2d .. autoclass:: MultiScaleRoIAlign .. autoclass:: FeaturePyramidNetwork +.. autoclass:: StochasticDepth diff --git a/test/test_ops.py b/test/test_ops.py index 5c2fc882902..c64ba1fd0bb 100644 --- a/test/test_ops.py +++ b/test/test_ops.py @@ -1000,5 +1000,33 @@ def gen_iou_check(box, expected, tolerance=1e-4): gen_iou_check(box_tensor, expected, tolerance=0.002 if dtype == torch.float16 else 1e-3) +class TestStochasticDepth: + @pytest.mark.parametrize('p', [0.2, 0.5, 0.8]) + @pytest.mark.parametrize('mode', ["batch", "row"]) + def test_stochastic_depth(self, mode, p): + stats = pytest.importorskip("scipy.stats") + batch_size = 5 + x = torch.ones(size=(batch_size, 3, 4, 4)) + layer = ops.StochasticDepth(p=p, mode=mode).to(device=x.device, dtype=x.dtype) + layer.__repr__() + + trials = 250 + num_samples = 0 + counts = 0 + for _ in range(trials): + out = layer(x) + non_zero_count = out.sum(dim=(1, 2, 3)).nonzero().size(0) + if mode == "batch": + if non_zero_count == 0: + counts += 1 + num_samples += 1 + elif mode == "row": + counts += batch_size - non_zero_count + num_samples += batch_size + + p_value = stats.binom_test(counts, num_samples, p=p) + assert p_value > 0.0001 + + if __name__ == '__main__': pytest.main([__file__]) diff --git a/torchvision/ops/__init__.py b/torchvision/ops/__init__.py index 0ec189dbc2a..606c27abcbe 100644 --- a/torchvision/ops/__init__.py +++ b/torchvision/ops/__init__.py @@ -8,6 +8,7 @@ from .poolers import MultiScaleRoIAlign from .feature_pyramid_network import FeaturePyramidNetwork from .focal_loss import sigmoid_focal_loss +from .stochastic_depth import stochastic_depth, StochasticDepth from ._register_onnx_ops import _register_custom_op @@ -20,5 +21,5 @@ 'box_area', 'box_iou', 'generalized_box_iou', 'roi_align', 'RoIAlign', 'roi_pool', 'RoIPool', 'ps_roi_align', 'PSRoIAlign', 'ps_roi_pool', 'PSRoIPool', 'MultiScaleRoIAlign', 'FeaturePyramidNetwork', - 'sigmoid_focal_loss' + 'sigmoid_focal_loss', 'stochastic_depth', 'StochasticDepth' ] diff --git a/torchvision/ops/stochastic_depth.py b/torchvision/ops/stochastic_depth.py new file mode 100644 index 00000000000..f3338242a76 --- /dev/null +++ b/torchvision/ops/stochastic_depth.py @@ -0,0 +1,56 @@ +import torch +from torch import nn, Tensor + + +def stochastic_depth(input: Tensor, p: float, mode: str, training: bool = True) -> Tensor: + """ + Implements the Stochastic Depth from `"Deep Networks with Stochastic Depth" + `_ used for randomly dropping residual + branches of residual architectures. + + Args: + input (Tensor[N, ...]): The input tensor or arbitrary dimensions with the first one + being its batch i.e. a batch with ``N`` rows. + p (float): probability of the input to be zeroed. + mode (str): ``"batch"`` or ``"row"``. + ``"batch"`` randomly zeroes the entire input, ``"row"`` zeroes + randomly selected rows from the batch. + training: apply stochastic depth if is ``True``. Default: ``True`` + + Returns: + Tensor[N, ...]: The randomly zeroed tensor. + """ + if p < 0.0 or p > 1.0: + raise ValueError("drop probability has to be between 0 and 1, but got {}".format(p)) + if not training or p == 0.0: + return input + + survival_rate = 1.0 - p + if mode not in ["batch", "row"]: + raise ValueError("mode has to be either 'batch' or 'row', but got {}".format(mode)) + size = [1] * input.ndim + if mode == "row": + size[0] = input.shape[0] + noise = torch.empty(size, dtype=input.dtype, device=input.device) + noise = noise.bernoulli_(survival_rate).div_(survival_rate) + return input * noise + + +class StochasticDepth(nn.Module): + """ + See :func:`stochastic_depth`. + """ + def __init__(self, p: float, mode: str) -> None: + super().__init__() + self.p = p + self.mode = mode + + def forward(self, input: Tensor) -> Tensor: + return stochastic_depth(input, self.p, self.mode, self.training) + + def __repr__(self) -> str: + tmpstr = self.__class__.__name__ + '(' + tmpstr += 'p=' + str(self.p) + tmpstr += ', mode=' + str(self.mode) + tmpstr += ')' + return tmpstr From 7947fc8fb38b1d3a2aca03f22a2e6a3caa63f2a0 Mon Sep 17 00:00:00 2001 From: Nikita Shulga Date: Sun, 22 Aug 2021 20:07:08 -0700 Subject: [PATCH 02/12] Prep change for changing def branch to main (#4306) --- .circleci/build_docs/commit_docs.sh | 4 +- .circleci/config.yml | 76 ++++++++++++++--------------- .circleci/config.yml.in | 8 +-- .circleci/regenerate.py | 4 +- .github/workflows/bandit.yml | 2 +- .github/workflows/codeql.yml | 2 +- CONTRIBUTING.md | 2 +- README.rst | 4 +- android/gradle.properties | 2 +- docs/source/conf.py | 4 +- test/test_cpp_models.py | 2 +- torchvision/__init__.py | 2 +- 12 files changed, 56 insertions(+), 56 deletions(-) diff --git a/.circleci/build_docs/commit_docs.sh b/.circleci/build_docs/commit_docs.sh index b923b0edbc4..04e3538fefc 100755 --- a/.circleci/build_docs/commit_docs.sh +++ b/.circleci/build_docs/commit_docs.sh @@ -6,7 +6,7 @@ set -ex if [ "$2" == "" ]; then echo call as "$0" "" "" echo where src is the root of the built documentation git checkout and - echo branch should be "master" or "1.7" or so + echo branch should be "main" or "1.7" or so exit 1 fi @@ -20,7 +20,7 @@ git checkout gh-pages mkdir -p ./"${target}" rm -rf ./"${target}"/* cp -r "${src}/docs/build/html/"* ./"$target" -if [ "${target}" == "master" ]; then +if [ "${target}" == "main" ]; then mkdir -p ./_static rm -rf ./_static/* cp -r "${src}/docs/build/html/_static/"* ./_static diff --git a/.circleci/config.yml b/.circleci/config.yml index 03f47e473ce..66c16edfe3e 100644 --- a/.circleci/config.yml +++ b/.circleci/config.yml @@ -29,7 +29,7 @@ commands: # command: | # set -ex # BRANCH=$(git rev-parse --abbrev-ref HEAD) -# if [[ "$BRANCH" != "master" ]]; then +# if [[ "$BRANCH" != "main" ]]; then # git fetch --force origin ${CIRCLE_BRANCH}/merge:merged/${CIRCLE_BRANCH} # git checkout "merged/$CIRCLE_BRANCH" # fi @@ -936,7 +936,7 @@ jobs: command: | set -ex tag=${CIRCLE_TAG:1:5} - VERSION=${tag:-master} + VERSION=${tag:-main} eval "$(./conda/bin/conda shell.bash hook)" conda activate ./env pushd docs @@ -982,7 +982,7 @@ jobs: # https://circleci.com/docs/2.0/configuration-reference/#checkout set -ex tag=${CIRCLE_TAG:1:5} - target=${tag:-master} + target=${tag:-main} ~/workspace/.circleci/build_docs/commit_docs.sh ~/workspace $target @@ -1159,7 +1159,7 @@ workflows: cu_version: cpu filters: branches: - only: master + only: main tags: only: /v[0-9]+(\.[0-9]+)*-rc[0-9]+/ name: binary_win_wheel_py3.6_cpu @@ -1168,7 +1168,7 @@ workflows: cu_version: cu102 filters: branches: - only: master + only: main tags: only: /v[0-9]+(\.[0-9]+)*-rc[0-9]+/ name: binary_win_wheel_py3.6_cu102 @@ -1177,7 +1177,7 @@ workflows: cu_version: cu111 filters: branches: - only: master + only: main tags: only: /v[0-9]+(\.[0-9]+)*-rc[0-9]+/ name: binary_win_wheel_py3.6_cu111 @@ -1186,7 +1186,7 @@ workflows: cu_version: cu113 filters: branches: - only: master + only: main tags: only: /v[0-9]+(\.[0-9]+)*-rc[0-9]+/ name: binary_win_wheel_py3.6_cu113 @@ -1195,7 +1195,7 @@ workflows: cu_version: cpu filters: branches: - only: master + only: main tags: only: /v[0-9]+(\.[0-9]+)*-rc[0-9]+/ name: binary_win_wheel_py3.7_cpu @@ -1204,7 +1204,7 @@ workflows: cu_version: cu102 filters: branches: - only: master + only: main tags: only: /v[0-9]+(\.[0-9]+)*-rc[0-9]+/ name: binary_win_wheel_py3.7_cu102 @@ -1213,7 +1213,7 @@ workflows: cu_version: cu111 filters: branches: - only: master + only: main tags: only: /v[0-9]+(\.[0-9]+)*-rc[0-9]+/ name: binary_win_wheel_py3.7_cu111 @@ -1222,7 +1222,7 @@ workflows: cu_version: cu113 filters: branches: - only: master + only: main tags: only: /v[0-9]+(\.[0-9]+)*-rc[0-9]+/ name: binary_win_wheel_py3.7_cu113 @@ -1231,7 +1231,7 @@ workflows: cu_version: cpu filters: branches: - only: master + only: main tags: only: /v[0-9]+(\.[0-9]+)*-rc[0-9]+/ name: binary_win_wheel_py3.8_cpu @@ -1240,7 +1240,7 @@ workflows: cu_version: cu102 filters: branches: - only: master + only: main tags: only: /v[0-9]+(\.[0-9]+)*-rc[0-9]+/ name: binary_win_wheel_py3.8_cu102 @@ -1249,7 +1249,7 @@ workflows: cu_version: cu111 filters: branches: - only: master + only: main tags: only: /v[0-9]+(\.[0-9]+)*-rc[0-9]+/ name: binary_win_wheel_py3.8_cu111 @@ -1258,7 +1258,7 @@ workflows: cu_version: cu113 filters: branches: - only: master + only: main tags: only: /v[0-9]+(\.[0-9]+)*-rc[0-9]+/ name: binary_win_wheel_py3.8_cu113 @@ -1271,7 +1271,7 @@ workflows: cu_version: cu102 filters: branches: - only: master + only: main tags: only: /v[0-9]+(\.[0-9]+)*-rc[0-9]+/ name: binary_win_wheel_py3.9_cu102 @@ -1280,7 +1280,7 @@ workflows: cu_version: cu111 filters: branches: - only: master + only: main tags: only: /v[0-9]+(\.[0-9]+)*-rc[0-9]+/ name: binary_win_wheel_py3.9_cu111 @@ -1413,7 +1413,7 @@ workflows: cu_version: cpu filters: branches: - only: master + only: main tags: only: /v[0-9]+(\.[0-9]+)*-rc[0-9]+/ name: binary_win_conda_py3.6_cpu @@ -1422,7 +1422,7 @@ workflows: cu_version: cu102 filters: branches: - only: master + only: main tags: only: /v[0-9]+(\.[0-9]+)*-rc[0-9]+/ name: binary_win_conda_py3.6_cu102 @@ -1431,7 +1431,7 @@ workflows: cu_version: cu111 filters: branches: - only: master + only: main tags: only: /v[0-9]+(\.[0-9]+)*-rc[0-9]+/ name: binary_win_conda_py3.6_cu111 @@ -1440,7 +1440,7 @@ workflows: cu_version: cu113 filters: branches: - only: master + only: main tags: only: /v[0-9]+(\.[0-9]+)*-rc[0-9]+/ name: binary_win_conda_py3.6_cu113 @@ -1449,7 +1449,7 @@ workflows: cu_version: cpu filters: branches: - only: master + only: main tags: only: /v[0-9]+(\.[0-9]+)*-rc[0-9]+/ name: binary_win_conda_py3.7_cpu @@ -1458,7 +1458,7 @@ workflows: cu_version: cu102 filters: branches: - only: master + only: main tags: only: /v[0-9]+(\.[0-9]+)*-rc[0-9]+/ name: binary_win_conda_py3.7_cu102 @@ -1467,7 +1467,7 @@ workflows: cu_version: cu111 filters: branches: - only: master + only: main tags: only: /v[0-9]+(\.[0-9]+)*-rc[0-9]+/ name: binary_win_conda_py3.7_cu111 @@ -1476,7 +1476,7 @@ workflows: cu_version: cu113 filters: branches: - only: master + only: main tags: only: /v[0-9]+(\.[0-9]+)*-rc[0-9]+/ name: binary_win_conda_py3.7_cu113 @@ -1485,7 +1485,7 @@ workflows: cu_version: cpu filters: branches: - only: master + only: main tags: only: /v[0-9]+(\.[0-9]+)*-rc[0-9]+/ name: binary_win_conda_py3.8_cpu @@ -1494,7 +1494,7 @@ workflows: cu_version: cu102 filters: branches: - only: master + only: main tags: only: /v[0-9]+(\.[0-9]+)*-rc[0-9]+/ name: binary_win_conda_py3.8_cu102 @@ -1503,7 +1503,7 @@ workflows: cu_version: cu111 filters: branches: - only: master + only: main tags: only: /v[0-9]+(\.[0-9]+)*-rc[0-9]+/ name: binary_win_conda_py3.8_cu111 @@ -1512,7 +1512,7 @@ workflows: cu_version: cu113 filters: branches: - only: master + only: main tags: only: /v[0-9]+(\.[0-9]+)*-rc[0-9]+/ name: binary_win_conda_py3.8_cu113 @@ -1525,7 +1525,7 @@ workflows: cu_version: cu102 filters: branches: - only: master + only: main tags: only: /v[0-9]+(\.[0-9]+)*-rc[0-9]+/ name: binary_win_conda_py3.9_cu102 @@ -1534,7 +1534,7 @@ workflows: cu_version: cu111 filters: branches: - only: master + only: main tags: only: /v[0-9]+(\.[0-9]+)*-rc[0-9]+/ name: binary_win_conda_py3.9_cu111 @@ -1609,7 +1609,7 @@ workflows: filters: branches: only: - - master + - main - nightly name: unittest_linux_gpu_py3.6 python_version: '3.6' @@ -1618,7 +1618,7 @@ workflows: filters: branches: only: - - master + - main - nightly name: unittest_linux_gpu_py3.7 python_version: '3.7' @@ -1631,7 +1631,7 @@ workflows: filters: branches: only: - - master + - main - nightly name: unittest_linux_gpu_py3.9 python_version: '3.9' @@ -1656,7 +1656,7 @@ workflows: filters: branches: only: - - master + - main - nightly name: unittest_windows_gpu_py3.6 python_version: '3.6' @@ -1665,7 +1665,7 @@ workflows: filters: branches: only: - - master + - main - nightly name: unittest_windows_gpu_py3.7 python_version: '3.7' @@ -1678,7 +1678,7 @@ workflows: filters: branches: only: - - master + - main - nightly name: unittest_windows_gpu_py3.9 python_version: '3.9' @@ -4075,7 +4075,7 @@ workflows: filters: branches: only: - - master + - main jobs: - smoke_test_docker_image_build: context: org-member diff --git a/.circleci/config.yml.in b/.circleci/config.yml.in index 15cb7eb6a07..980332d3ea9 100644 --- a/.circleci/config.yml.in +++ b/.circleci/config.yml.in @@ -29,7 +29,7 @@ commands: # command: | # set -ex # BRANCH=$(git rev-parse --abbrev-ref HEAD) -# if [[ "$BRANCH" != "master" ]]; then +# if [[ "$BRANCH" != "main" ]]; then # git fetch --force origin ${CIRCLE_BRANCH}/merge:merged/${CIRCLE_BRANCH} # git checkout "merged/$CIRCLE_BRANCH" # fi @@ -936,7 +936,7 @@ jobs: command: | set -ex tag=${CIRCLE_TAG:1:5} - VERSION=${tag:-master} + VERSION=${tag:-main} eval "$(./conda/bin/conda shell.bash hook)" conda activate ./env pushd docs @@ -982,7 +982,7 @@ jobs: # https://circleci.com/docs/2.0/configuration-reference/#checkout set -ex tag=${CIRCLE_TAG:1:5} - target=${tag:-master} + target=${tag:-main} ~/workspace/.circleci/build_docs/commit_docs.sh ~/workspace $target @@ -1029,7 +1029,7 @@ workflows: filters: branches: only: - - master + - main jobs: - smoke_test_docker_image_build: context: org-member diff --git a/.circleci/regenerate.py b/.circleci/regenerate.py index 157cabca433..7e2fa25cb9d 100755 --- a/.circleci/regenerate.py +++ b/.circleci/regenerate.py @@ -44,7 +44,7 @@ def build_workflows(prefix='', filter_branch=None, upload=False, indentation=6, if windows_latest_only and os_type == "win" and filter_branch is None and \ (python_version != python_versions[-1] or (cu_version not in [cu_versions[0], cu_versions[-1]])): - fb = "master" + fb = "main" if not fb and (os_type == 'linux' and cu_version == 'cpu' and btype == 'wheel' and @@ -241,7 +241,7 @@ def unittest_workflows(indentation=6): if device_type == 'gpu': if python_version != "3.8": - job['filters'] = gen_filter_branch_tree('master', 'nightly') + job['filters'] = gen_filter_branch_tree('main', 'nightly') job['cu_version'] = 'cu102' else: job['cu_version'] = 'cpu' diff --git a/.github/workflows/bandit.yml b/.github/workflows/bandit.yml index 93bae80f9bd..84200b438a9 100644 --- a/.github/workflows/bandit.yml +++ b/.github/workflows/bandit.yml @@ -4,7 +4,7 @@ name: Bandit on: pull_request: - branches: [ master ] + branches: [ main ] workflow_dispatch: diff --git a/.github/workflows/codeql.yml b/.github/workflows/codeql.yml index 3c8bc96a5bd..99460b11228 100644 --- a/.github/workflows/codeql.yml +++ b/.github/workflows/codeql.yml @@ -4,7 +4,7 @@ name: CodeQL on: pull_request: - branches: [ master ] + branches: [ main ] workflow_dispatch: diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md index 75405c94a83..55880ae5d70 100644 --- a/CONTRIBUTING.md +++ b/CONTRIBUTING.md @@ -60,7 +60,7 @@ conda install libpng jpeg If you plan to modify the code or documentation, please follow the steps below: -1. Fork the repository and create your branch from `master`. +1. Fork the repository and create your branch from `main`. 2. If you have modified the code (new feature or bug-fix), please add unit tests. 3. If you have changed APIs, update the documentation. Make sure the documentation builds. 4. Ensure the test suite passes. diff --git a/README.rst b/README.rst index 2c6daee4cc2..35cef0c19df 100644 --- a/README.rst +++ b/README.rst @@ -21,7 +21,7 @@ supported Python versions. +--------------------------+--------------------------+---------------------------------+ | ``torch`` | ``torchvision`` | ``python`` | +==========================+==========================+=================================+ -| ``master`` / ``nightly`` | ``master`` / ``nightly`` | ``>=3.6`` | +| ``master`` / ``nightly`` | ``main`` / ``nightly`` | ``>=3.6`` | +--------------------------+--------------------------+---------------------------------+ | ``1.9.0`` | ``0.10.0`` | ``>=3.6`` | +--------------------------+--------------------------+---------------------------------+ @@ -76,7 +76,7 @@ From source: In case building TorchVision from source fails, install the nightly version of PyTorch following -the linked guide on the `contributing page `_ and retry the install. +the linked guide on the `contributing page `_ and retry the install. By default, GPU support is built if CUDA is found and ``torch.cuda.is_available()`` is true. It's possible to force building GPU support by setting ``FORCE_CUDA=1`` environment variable, diff --git a/android/gradle.properties b/android/gradle.properties index a8105544f30..9c5b8f61212 100644 --- a/android/gradle.properties +++ b/android/gradle.properties @@ -9,7 +9,7 @@ POM_SCM_URL=https://github.com/pytorch/vision.git POM_SCM_CONNECTION=scm:git:https://github.com/pytorch/vision POM_SCM_DEV_CONNECTION=scm:git:git@github.com:pytorch/vision.git POM_LICENSE_NAME=BSD 3-Clause -POM_LICENSE_URL=https://github.com/pytorch/vision/blob/master/LICENSE +POM_LICENSE_URL=https://github.com/pytorch/vision/blob/main/LICENSE POM_ISSUES_URL=https://github.com/pytorch/vision/issues POM_LICENSE_DIST=repo POM_DEVELOPER_ID=pytorch diff --git a/docs/source/conf.py b/docs/source/conf.py index 6bbb05c13c7..e8e17edf283 100644 --- a/docs/source/conf.py +++ b/docs/source/conf.py @@ -82,10 +82,10 @@ # # The short X.Y version. # TODO: change to [:2] at v1.0 -version = 'master (' + torchvision.__version__ + ' )' +version = 'main (' + torchvision.__version__ + ' )' # The full version, including alpha/beta/rc tags. # TODO: verify this works as expected -release = 'master' +release = 'main' # The language for content autogenerated by Sphinx. Refer to documentation # for a list of supported languages. diff --git a/test/test_cpp_models.py b/test/test_cpp_models.py index 6deb5d79739..2307051ff60 100644 --- a/test/test_cpp_models.py +++ b/test/test_cpp_models.py @@ -45,7 +45,7 @@ def read_image2(): @unittest.skipIf( sys.platform == "darwin" or True, - "C++ models are broken on OS X at the moment, and there's a BC breakage on master; " + "C++ models are broken on OS X at the moment, and there's a BC breakage on main; " "see https://github.com/pytorch/vision/issues/1191") class Tester(unittest.TestCase): pretrained = False diff --git a/torchvision/__init__.py b/torchvision/__init__.py index 3cbdda7af7f..a5a6f568151 100644 --- a/torchvision/__init__.py +++ b/torchvision/__init__.py @@ -66,7 +66,7 @@ def set_video_backend(backend): It generally decodes faster than :mod:`pyav`, but is perhaps less robust. .. note:: - Building with FFMPEG is disabled by default in the latest master. If you want to use the 'video_reader' + Building with FFMPEG is disabled by default in the latest `main`. If you want to use the 'video_reader' backend, please compile torchvision from source. """ global _video_backend From 185be3a9811a9cc6dc6206a397d035be3c20649b Mon Sep 17 00:00:00 2001 From: F-G Fernandez Date: Mon, 23 Aug 2021 13:50:59 +0200 Subject: [PATCH 03/12] Added typing annotations to models/segmentation (#4227) * style: Added typing annotations to segmentation/_utils * style: Added typing annotations to segmentation/segmentation * style: Added typing annotations to remaining segmentation models * style: Fixed typing of DeepLab * style: Fixed typing * fix: Fixed typing annotations & default values * Fixing python_type_check --- torchvision/models/segmentation/_utils.py | 12 ++- torchvision/models/segmentation/deeplabv3.py | 19 ++--- torchvision/models/segmentation/fcn.py | 2 +- torchvision/models/segmentation/lraspp.py | 19 ++++- .../models/segmentation/segmentation.py | 76 +++++++++++++++---- 5 files changed, 97 insertions(+), 31 deletions(-) diff --git a/torchvision/models/segmentation/_utils.py b/torchvision/models/segmentation/_utils.py index 176b7490038..fb94b9b1528 100644 --- a/torchvision/models/segmentation/_utils.py +++ b/torchvision/models/segmentation/_utils.py @@ -1,19 +1,25 @@ from collections import OrderedDict +from typing import Optional, Dict -from torch import nn +from torch import nn, Tensor from torch.nn import functional as F class _SimpleSegmentationModel(nn.Module): __constants__ = ['aux_classifier'] - def __init__(self, backbone, classifier, aux_classifier=None): + def __init__( + self, + backbone: nn.Module, + classifier: nn.Module, + aux_classifier: Optional[nn.Module] = None + ) -> None: super(_SimpleSegmentationModel, self).__init__() self.backbone = backbone self.classifier = classifier self.aux_classifier = aux_classifier - def forward(self, x): + def forward(self, x: Tensor) -> Dict[str, Tensor]: input_shape = x.shape[-2:] # contract: features is a dict of tensors features = self.backbone(x) diff --git a/torchvision/models/segmentation/deeplabv3.py b/torchvision/models/segmentation/deeplabv3.py index 7acc013ccb1..15ab8846e7d 100644 --- a/torchvision/models/segmentation/deeplabv3.py +++ b/torchvision/models/segmentation/deeplabv3.py @@ -1,6 +1,7 @@ import torch from torch import nn from torch.nn import functional as F +from typing import List from ._utils import _SimpleSegmentationModel @@ -27,7 +28,7 @@ class DeepLabV3(_SimpleSegmentationModel): class DeepLabHead(nn.Sequential): - def __init__(self, in_channels, num_classes): + def __init__(self, in_channels: int, num_classes: int) -> None: super(DeepLabHead, self).__init__( ASPP(in_channels, [12, 24, 36]), nn.Conv2d(256, 256, 3, padding=1, bias=False), @@ -38,7 +39,7 @@ def __init__(self, in_channels, num_classes): class ASPPConv(nn.Sequential): - def __init__(self, in_channels, out_channels, dilation): + def __init__(self, in_channels: int, out_channels: int, dilation: int) -> None: modules = [ nn.Conv2d(in_channels, out_channels, 3, padding=dilation, dilation=dilation, bias=False), nn.BatchNorm2d(out_channels), @@ -48,14 +49,14 @@ def __init__(self, in_channels, out_channels, dilation): class ASPPPooling(nn.Sequential): - def __init__(self, in_channels, out_channels): + def __init__(self, in_channels: int, out_channels: int) -> None: super(ASPPPooling, self).__init__( nn.AdaptiveAvgPool2d(1), nn.Conv2d(in_channels, out_channels, 1, bias=False), nn.BatchNorm2d(out_channels), nn.ReLU()) - def forward(self, x): + def forward(self, x: torch.Tensor) -> torch.Tensor: size = x.shape[-2:] for mod in self: x = mod(x) @@ -63,7 +64,7 @@ def forward(self, x): class ASPP(nn.Module): - def __init__(self, in_channels, atrous_rates, out_channels=256): + def __init__(self, in_channels: int, atrous_rates: List[int], out_channels: int = 256) -> None: super(ASPP, self).__init__() modules = [] modules.append(nn.Sequential( @@ -85,9 +86,9 @@ def __init__(self, in_channels, atrous_rates, out_channels=256): nn.ReLU(), nn.Dropout(0.5)) - def forward(self, x): - res = [] + def forward(self, x: torch.Tensor) -> torch.Tensor: + _res = [] for conv in self.convs: - res.append(conv(x)) - res = torch.cat(res, dim=1) + _res.append(conv(x)) + res = torch.cat(_res, dim=1) return self.project(res) diff --git a/torchvision/models/segmentation/fcn.py b/torchvision/models/segmentation/fcn.py index 3c695b53167..9c8db1e1211 100644 --- a/torchvision/models/segmentation/fcn.py +++ b/torchvision/models/segmentation/fcn.py @@ -23,7 +23,7 @@ class FCN(_SimpleSegmentationModel): class FCNHead(nn.Sequential): - def __init__(self, in_channels, channels): + def __init__(self, in_channels: int, channels: int) -> None: inter_channels = in_channels // 4 layers = [ nn.Conv2d(in_channels, inter_channels, 3, padding=1, bias=False), diff --git a/torchvision/models/segmentation/lraspp.py b/torchvision/models/segmentation/lraspp.py index 44cd9b1e773..0e5fb5ee898 100644 --- a/torchvision/models/segmentation/lraspp.py +++ b/torchvision/models/segmentation/lraspp.py @@ -24,12 +24,19 @@ class LRASPP(nn.Module): inter_channels (int, optional): the number of channels for intermediate computations. """ - def __init__(self, backbone, low_channels, high_channels, num_classes, inter_channels=128): + def __init__( + self, + backbone: nn.Module, + low_channels: int, + high_channels: int, + num_classes: int, + inter_channels: int = 128 + ) -> None: super().__init__() self.backbone = backbone self.classifier = LRASPPHead(low_channels, high_channels, num_classes, inter_channels) - def forward(self, input): + def forward(self, input: Tensor) -> Dict[str, Tensor]: features = self.backbone(input) out = self.classifier(features) out = F.interpolate(out, size=input.shape[-2:], mode='bilinear', align_corners=False) @@ -42,7 +49,13 @@ def forward(self, input): class LRASPPHead(nn.Module): - def __init__(self, low_channels, high_channels, num_classes, inter_channels): + def __init__( + self, + low_channels: int, + high_channels: int, + num_classes: int, + inter_channels: int + ) -> None: super().__init__() self.cbr = nn.Sequential( nn.Conv2d(high_channels, inter_channels, 1, bias=False), diff --git a/torchvision/models/segmentation/segmentation.py b/torchvision/models/segmentation/segmentation.py index 0f2f14c97ba..938965e330b 100644 --- a/torchvision/models/segmentation/segmentation.py +++ b/torchvision/models/segmentation/segmentation.py @@ -1,3 +1,5 @@ +from torch import nn +from typing import Any, Optional from .._utils import IntermediateLayerGetter from ..._internally_replaced_utils import load_state_dict_from_url from .. import mobilenetv3 @@ -22,7 +24,13 @@ } -def _segm_model(name, backbone_name, num_classes, aux, pretrained_backbone=True): +def _segm_model( + name: str, + backbone_name: str, + num_classes: int, + aux: Optional[bool], + pretrained_backbone: bool = True +) -> nn.Module: if 'resnet' in backbone_name: backbone = resnet.__dict__[backbone_name]( pretrained=pretrained_backbone, @@ -66,7 +74,15 @@ def _segm_model(name, backbone_name, num_classes, aux, pretrained_backbone=True) return model -def _load_model(arch_type, backbone, pretrained, progress, num_classes, aux_loss, **kwargs): +def _load_model( + arch_type: str, + backbone: str, + pretrained: bool, + progress: bool, + num_classes: int, + aux_loss: Optional[bool], + **kwargs: Any +) -> nn.Module: if pretrained: aux_loss = True kwargs["pretrained_backbone"] = False @@ -76,7 +92,7 @@ def _load_model(arch_type, backbone, pretrained, progress, num_classes, aux_loss return model -def _load_weights(model, arch_type, backbone, progress): +def _load_weights(model: nn.Module, arch_type: str, backbone: str, progress: bool) -> None: arch = arch_type + '_' + backbone + '_coco' model_url = model_urls.get(arch, None) if model_url is None: @@ -86,7 +102,7 @@ def _load_weights(model, arch_type, backbone, progress): model.load_state_dict(state_dict) -def _segm_lraspp_mobilenetv3(backbone_name, num_classes, pretrained_backbone=True): +def _segm_lraspp_mobilenetv3(backbone_name: str, num_classes: int, pretrained_backbone: bool = True) -> LRASPP: backbone = mobilenetv3.__dict__[backbone_name](pretrained=pretrained_backbone, dilated=True).features # Gather the indices of blocks which are strided. These are the locations of C1, ..., Cn-1 blocks. @@ -103,8 +119,13 @@ def _segm_lraspp_mobilenetv3(backbone_name, num_classes, pretrained_backbone=Tru return model -def fcn_resnet50(pretrained=False, progress=True, - num_classes=21, aux_loss=None, **kwargs): +def fcn_resnet50( + pretrained: bool = False, + progress: bool = True, + num_classes: int = 21, + aux_loss: Optional[bool] = None, + **kwargs: Any +) -> nn.Module: """Constructs a Fully-Convolutional Network model with a ResNet-50 backbone. Args: @@ -117,8 +138,13 @@ def fcn_resnet50(pretrained=False, progress=True, return _load_model('fcn', 'resnet50', pretrained, progress, num_classes, aux_loss, **kwargs) -def fcn_resnet101(pretrained=False, progress=True, - num_classes=21, aux_loss=None, **kwargs): +def fcn_resnet101( + pretrained: bool = False, + progress: bool = True, + num_classes: int = 21, + aux_loss: Optional[bool] = None, + **kwargs: Any +) -> nn.Module: """Constructs a Fully-Convolutional Network model with a ResNet-101 backbone. Args: @@ -131,8 +157,13 @@ def fcn_resnet101(pretrained=False, progress=True, return _load_model('fcn', 'resnet101', pretrained, progress, num_classes, aux_loss, **kwargs) -def deeplabv3_resnet50(pretrained=False, progress=True, - num_classes=21, aux_loss=None, **kwargs): +def deeplabv3_resnet50( + pretrained: bool = False, + progress: bool = True, + num_classes: int = 21, + aux_loss: Optional[bool] = None, + **kwargs: Any +) -> nn.Module: """Constructs a DeepLabV3 model with a ResNet-50 backbone. Args: @@ -145,8 +176,13 @@ def deeplabv3_resnet50(pretrained=False, progress=True, return _load_model('deeplabv3', 'resnet50', pretrained, progress, num_classes, aux_loss, **kwargs) -def deeplabv3_resnet101(pretrained=False, progress=True, - num_classes=21, aux_loss=None, **kwargs): +def deeplabv3_resnet101( + pretrained: bool = False, + progress: bool = True, + num_classes: int = 21, + aux_loss: Optional[bool] = None, + **kwargs: Any +) -> nn.Module: """Constructs a DeepLabV3 model with a ResNet-101 backbone. Args: @@ -159,8 +195,13 @@ def deeplabv3_resnet101(pretrained=False, progress=True, return _load_model('deeplabv3', 'resnet101', pretrained, progress, num_classes, aux_loss, **kwargs) -def deeplabv3_mobilenet_v3_large(pretrained=False, progress=True, - num_classes=21, aux_loss=None, **kwargs): +def deeplabv3_mobilenet_v3_large( + pretrained: bool = False, + progress: bool = True, + num_classes: int = 21, + aux_loss: Optional[bool] = None, + **kwargs: Any +) -> nn.Module: """Constructs a DeepLabV3 model with a MobileNetV3-Large backbone. Args: @@ -173,7 +214,12 @@ def deeplabv3_mobilenet_v3_large(pretrained=False, progress=True, return _load_model('deeplabv3', 'mobilenet_v3_large', pretrained, progress, num_classes, aux_loss, **kwargs) -def lraspp_mobilenet_v3_large(pretrained=False, progress=True, num_classes=21, **kwargs): +def lraspp_mobilenet_v3_large( + pretrained: bool = False, + progress: bool = True, + num_classes: int = 21, + **kwargs: Any +) -> nn.Module: """Constructs a Lite R-ASPP Network model with a MobileNetV3-Large backbone. Args: From 11d36292b84c640b23f446b862ba4992e51b5e63 Mon Sep 17 00:00:00 2001 From: F-G Fernandez Date: Mon, 23 Aug 2021 16:51:35 +0200 Subject: [PATCH 04/12] Added typing annotations to models/video (#4229) * style: Added typing to models/video * style: Fixed typing * style: Fixed typing * style: Fixed typing * refactor: Removed default value for stem * docs: Fixed docstring of VideoResNet * style: Refactored typing * docs: Fixed docstring * style: Fixed typing * docs: Specified docstring * typing: Fixed tying * docs: Fixed docstring * Undoing change. --- torchvision/models/video/resnet.py | 120 +++++++++++++++++++---------- 1 file changed, 78 insertions(+), 42 deletions(-) diff --git a/torchvision/models/video/resnet.py b/torchvision/models/video/resnet.py index fc69188ef7a..faf3b3bc4a8 100644 --- a/torchvision/models/video/resnet.py +++ b/torchvision/models/video/resnet.py @@ -1,4 +1,6 @@ +from torch import Tensor import torch.nn as nn +from typing import Tuple, Optional, Callable, List, Type, Any, Union from ..._internally_replaced_utils import load_state_dict_from_url @@ -13,12 +15,14 @@ class Conv3DSimple(nn.Conv3d): - def __init__(self, - in_planes, - out_planes, - midplanes=None, - stride=1, - padding=1): + def __init__( + self, + in_planes: int, + out_planes: int, + midplanes: Optional[int] = None, + stride: int = 1, + padding: int = 1 + ) -> None: super(Conv3DSimple, self).__init__( in_channels=in_planes, @@ -29,18 +33,20 @@ def __init__(self, bias=False) @staticmethod - def get_downsample_stride(stride): + def get_downsample_stride(stride: int) -> Tuple[int, int, int]: return stride, stride, stride class Conv2Plus1D(nn.Sequential): - def __init__(self, - in_planes, - out_planes, - midplanes, - stride=1, - padding=1): + def __init__( + self, + in_planes: int, + out_planes: int, + midplanes: int, + stride: int = 1, + padding: int = 1 + ) -> None: super(Conv2Plus1D, self).__init__( nn.Conv3d(in_planes, midplanes, kernel_size=(1, 3, 3), stride=(1, stride, stride), padding=(0, padding, padding), @@ -52,18 +58,20 @@ def __init__(self, bias=False)) @staticmethod - def get_downsample_stride(stride): + def get_downsample_stride(stride: int) -> Tuple[int, int, int]: return stride, stride, stride class Conv3DNoTemporal(nn.Conv3d): - def __init__(self, - in_planes, - out_planes, - midplanes=None, - stride=1, - padding=1): + def __init__( + self, + in_planes: int, + out_planes: int, + midplanes: Optional[int] = None, + stride: int = 1, + padding: int = 1 + ) -> None: super(Conv3DNoTemporal, self).__init__( in_channels=in_planes, @@ -74,7 +82,7 @@ def __init__(self, bias=False) @staticmethod - def get_downsample_stride(stride): + def get_downsample_stride(stride: int) -> Tuple[int, int, int]: return 1, stride, stride @@ -82,7 +90,14 @@ class BasicBlock(nn.Module): expansion = 1 - def __init__(self, inplanes, planes, conv_builder, stride=1, downsample=None): + def __init__( + self, + inplanes: int, + planes: int, + conv_builder: Callable[..., nn.Module], + stride: int = 1, + downsample: Optional[nn.Module] = None, + ) -> None: midplanes = (inplanes * planes * 3 * 3 * 3) // (inplanes * 3 * 3 + 3 * planes) super(BasicBlock, self).__init__() @@ -99,7 +114,7 @@ def __init__(self, inplanes, planes, conv_builder, stride=1, downsample=None): self.downsample = downsample self.stride = stride - def forward(self, x): + def forward(self, x: Tensor) -> Tensor: residual = x out = self.conv1(x) @@ -116,7 +131,14 @@ def forward(self, x): class Bottleneck(nn.Module): expansion = 4 - def __init__(self, inplanes, planes, conv_builder, stride=1, downsample=None): + def __init__( + self, + inplanes: int, + planes: int, + conv_builder: Callable[..., nn.Module], + stride: int = 1, + downsample: Optional[nn.Module] = None, + ) -> None: super(Bottleneck, self).__init__() midplanes = (inplanes * planes * 3 * 3 * 3) // (inplanes * 3 * 3 + 3 * planes) @@ -143,7 +165,7 @@ def __init__(self, inplanes, planes, conv_builder, stride=1, downsample=None): self.downsample = downsample self.stride = stride - def forward(self, x): + def forward(self, x: Tensor) -> Tensor: residual = x out = self.conv1(x) @@ -162,7 +184,7 @@ def forward(self, x): class BasicStem(nn.Sequential): """The default conv-batchnorm-relu stem """ - def __init__(self): + def __init__(self) -> None: super(BasicStem, self).__init__( nn.Conv3d(3, 64, kernel_size=(3, 7, 7), stride=(1, 2, 2), padding=(1, 3, 3), bias=False), @@ -173,7 +195,7 @@ def __init__(self): class R2Plus1dStem(nn.Sequential): """R(2+1)D stem is different than the default one as it uses separated 3D convolution """ - def __init__(self): + def __init__(self) -> None: super(R2Plus1dStem, self).__init__( nn.Conv3d(3, 45, kernel_size=(1, 7, 7), stride=(1, 2, 2), padding=(0, 3, 3), @@ -189,16 +211,23 @@ def __init__(self): class VideoResNet(nn.Module): - def __init__(self, block, conv_makers, layers, - stem, num_classes=400, - zero_init_residual=False): + def __init__( + self, + block: Type[Union[BasicBlock, Bottleneck]], + conv_makers: List[Type[Union[Conv3DSimple, Conv3DNoTemporal, Conv2Plus1D]]], + layers: List[int], + stem: Callable[..., nn.Module], + num_classes: int = 400, + zero_init_residual: bool = False, + ) -> None: """Generic resnet video generator. Args: - block (nn.Module): resnet building block - conv_makers (list(functions)): generator function for each layer + block (Type[Union[BasicBlock, Bottleneck]]): resnet building block + conv_makers (List[Type[Union[Conv3DSimple, Conv3DNoTemporal, Conv2Plus1D]]]): generator + function for each layer layers (List[int]): number of blocks per layer - stem (nn.Module, optional): Resnet stem, if None, defaults to conv-bn-relu. Defaults to None. + stem (Callable[..., nn.Module]): module specifying the ResNet stem. num_classes (int, optional): Dimension of the final FC layer. Defaults to 400. zero_init_residual (bool, optional): Zero init bottleneck residual BN. Defaults to False. """ @@ -221,9 +250,9 @@ def __init__(self, block, conv_makers, layers, if zero_init_residual: for m in self.modules(): if isinstance(m, Bottleneck): - nn.init.constant_(m.bn3.weight, 0) + nn.init.constant_(m.bn3.weight, 0) # type: ignore[union-attr, arg-type] - def forward(self, x): + def forward(self, x: Tensor) -> Tensor: x = self.stem(x) x = self.layer1(x) @@ -238,7 +267,14 @@ def forward(self, x): return x - def _make_layer(self, block, conv_builder, planes, blocks, stride=1): + def _make_layer( + self, + block: Type[Union[BasicBlock, Bottleneck]], + conv_builder: Type[Union[Conv3DSimple, Conv3DNoTemporal, Conv2Plus1D]], + planes: int, + blocks: int, + stride: int = 1 + ) -> nn.Sequential: downsample = None if stride != 1 or self.inplanes != planes * block.expansion: @@ -257,7 +293,7 @@ def _make_layer(self, block, conv_builder, planes, blocks, stride=1): return nn.Sequential(*layers) - def _initialize_weights(self): + def _initialize_weights(self) -> None: for m in self.modules(): if isinstance(m, nn.Conv3d): nn.init.kaiming_normal_(m.weight, mode='fan_out', @@ -272,7 +308,7 @@ def _initialize_weights(self): nn.init.constant_(m.bias, 0) -def _video_resnet(arch, pretrained=False, progress=True, **kwargs): +def _video_resnet(arch: str, pretrained: bool = False, progress: bool = True, **kwargs: Any) -> VideoResNet: model = VideoResNet(**kwargs) if pretrained: @@ -282,7 +318,7 @@ def _video_resnet(arch, pretrained=False, progress=True, **kwargs): return model -def r3d_18(pretrained=False, progress=True, **kwargs): +def r3d_18(pretrained: bool = False, progress: bool = True, **kwargs: Any) -> VideoResNet: """Construct 18 layer Resnet3D model as in https://arxiv.org/abs/1711.11248 @@ -302,7 +338,7 @@ def r3d_18(pretrained=False, progress=True, **kwargs): stem=BasicStem, **kwargs) -def mc3_18(pretrained=False, progress=True, **kwargs): +def mc3_18(pretrained: bool = False, progress: bool = True, **kwargs: Any) -> VideoResNet: """Constructor for 18 layer Mixed Convolution network as in https://arxiv.org/abs/1711.11248 @@ -316,12 +352,12 @@ def mc3_18(pretrained=False, progress=True, **kwargs): return _video_resnet('mc3_18', pretrained, progress, block=BasicBlock, - conv_makers=[Conv3DSimple] + [Conv3DNoTemporal] * 3, + conv_makers=[Conv3DSimple] + [Conv3DNoTemporal] * 3, # type: ignore[list-item] layers=[2, 2, 2, 2], stem=BasicStem, **kwargs) -def r2plus1d_18(pretrained=False, progress=True, **kwargs): +def r2plus1d_18(pretrained: bool = False, progress: bool = True, **kwargs: Any) -> VideoResNet: """Constructor for the 18 layer deep R(2+1)D network as in https://arxiv.org/abs/1711.11248 From b72129c5338cf3e888b1ce5dbdc0f4cb31a653cf Mon Sep 17 00:00:00 2001 From: Philip Meier Date: Mon, 23 Aug 2021 19:12:22 +0200 Subject: [PATCH 05/12] fix dependency table (#4308) --- README.rst | 20 ++++++++++---------- 1 file changed, 10 insertions(+), 10 deletions(-) diff --git a/README.rst b/README.rst index 35cef0c19df..bca61ce4680 100644 --- a/README.rst +++ b/README.rst @@ -21,25 +21,25 @@ supported Python versions. +--------------------------+--------------------------+---------------------------------+ | ``torch`` | ``torchvision`` | ``python`` | +==========================+==========================+=================================+ -| ``master`` / ``nightly`` | ``main`` / ``nightly`` | ``>=3.6`` | +| ``main`` / ``nightly`` | ``main`` / ``nightly`` | ``>=3.6``, ``<=3.9`` | +--------------------------+--------------------------+---------------------------------+ -| ``1.9.0`` | ``0.10.0`` | ``>=3.6`` | +| ``1.9.0`` | ``0.10.0`` | ``>=3.6``, ``<=3.9`` | +--------------------------+--------------------------+---------------------------------+ -| ``1.8.1`` | ``0.9.1`` | ``>=3.6`` | +| ``1.8.1`` | ``0.9.1`` | ``>=3.6``, ``<=3.9`` | +--------------------------+--------------------------+---------------------------------+ -| ``1.8.0`` | ``0.9.0`` | ``>=3.6`` | +| ``1.8.0`` | ``0.9.0`` | ``>=3.6``, ``<=3.9`` | +--------------------------+--------------------------+---------------------------------+ -| ``1.7.1`` | ``0.8.2`` | ``>=3.6`` | +| ``1.7.1`` | ``0.8.2`` | ``>=3.6``, ``<=3.9`` | +--------------------------+--------------------------+---------------------------------+ -| ``1.7.0`` | ``0.8.1`` | ``>=3.6`` | +| ``1.7.0`` | ``0.8.1`` | ``>=3.6``, ``<=3.8`` | +--------------------------+--------------------------+---------------------------------+ -| ``1.7.0`` | ``0.8.0`` | ``>=3.6`` | +| ``1.7.0`` | ``0.8.0`` | ``>=3.6``, ``<=3.8`` | +--------------------------+--------------------------+---------------------------------+ -| ``1.6.0`` | ``0.7.0`` | ``>=3.6`` | +| ``1.6.0`` | ``0.7.0`` | ``>=3.6``, ``<=3.8`` | +--------------------------+--------------------------+---------------------------------+ -| ``1.5.1`` | ``0.6.1`` | ``>=3.5`` | +| ``1.5.1`` | ``0.6.1`` | ``>=3.5``, ``<=3.8`` | +--------------------------+--------------------------+---------------------------------+ -| ``1.5.0`` | ``0.6.0`` | ``>=3.5`` | +| ``1.5.0`` | ``0.6.0`` | ``>=3.5``, ``<=3.8`` | +--------------------------+--------------------------+---------------------------------+ | ``1.4.0`` | ``0.5.0`` | ``==2.7``, ``>=3.5``, ``<=3.8`` | +--------------------------+--------------------------+---------------------------------+ From 32f95c77f7cc6daa750221b37f14ded9973b886c Mon Sep 17 00:00:00 2001 From: Vasilis Vryniotis Date: Thu, 26 Aug 2021 09:16:20 +0100 Subject: [PATCH 06/12] [FBcode->GH] Port quantize_val and dequantize_val into torchvision to avoid at::native and android xplat incompatibility (#4311) Summary: This diff ports `quantize_val` and `dequantize_val` from at::native to torchvision because native kernels are incompatible with android xplat builds (see D30234056). This should only be temporary until we find a way to move those functions out of at::native, or until the at::native / android incompatibility disappears. Reviewed By: fmassa Differential Revision: D30393619 fbshipit-source-id: 18b7b1b349ad9a24088a120e23da7535f7fa7ddc Co-authored-by: Nicolas Hug --- .../ops/quantized/cpu/qroi_align_kernel.cpp | 96 +++++++++++++++++-- 1 file changed, 89 insertions(+), 7 deletions(-) diff --git a/torchvision/csrc/ops/quantized/cpu/qroi_align_kernel.cpp b/torchvision/csrc/ops/quantized/cpu/qroi_align_kernel.cpp index cfd5ec4ee97..20d4f503501 100644 --- a/torchvision/csrc/ops/quantized/cpu/qroi_align_kernel.cpp +++ b/torchvision/csrc/ops/quantized/cpu/qroi_align_kernel.cpp @@ -1,5 +1,4 @@ #include -#include #include #include "../../cpu/roi_align_common.h" @@ -9,6 +8,90 @@ namespace ops { namespace { +// BEGIN copy-pasted code from pytorch core +// https://github.com/pytorch/pytorch/blob/master/aten/src/ATen/native/quantized/affine_quantizer_base.cpp +// We're vendoring the quantize_val() and dequantize_val() functions here. The +// reason is that these functions belong in at::native, which is incompatible +// with android xplat support. + +// FIXME: Remove this section once we can use at::native for android xplat +// builds, or when quantize_val() and dequantize_val() aren't in at::native + +#ifdef USE_FBGEMM +template +T quantize_val(double scale, int64_t zero_point, float value) { + // Internally, fbgemm::Quantize uses std::nearbyint. + // std::nearbyint results in nearest integer value according to the current + // rounding mode and the default rounding mode is rounds to even in half-way + // cases in most popular processor architectures like x86 and ARM. This is + // typically faster than an alternatives like std::round that rounds half-way + // cases away from zero, and can be consistent with SIMD implementations for + // example in x86 using _mm512_cvtps_epi32 or mm512_round_ps with + // _MM_FROUND_CUR_DIRECTION option that also follow the current rounding mode. + // NOLINTNEXTLINE(cppcoreguidelines-init-variables) + int32_t qvalue; + // NOLINTNEXTLINE(bugprone-signed-char-misuse) + qvalue = fbgemm::Quantize( + value, + static_cast(zero_point), + static_cast(scale), + /*result_precision=*/CHAR_BIT * sizeof(typename T::underlying)); + return static_cast(qvalue); +} + +template +inline float dequantize_val(double scale, int64_t zero_point, T value) { + // NOLINTNEXTLINE(cppcoreguidelines-pro-type-member-init) + fbgemm::TensorQuantizationParams qparams; + qparams.scale = static_cast(scale); + qparams.zero_point = static_cast(zero_point); + return fbgemm::Dequantize(value.val_, qparams); +} +#else // USE_FBGEMM + +#if defined(__ANDROID__) && !defined(__NDK_MAJOR__) +template +inline float Round(const float x) { + return ::nearbyintf(x); +} +inline double Round(const double x) { + return ::nearbyint(x); +} +#else +template +inline T Round(const T x) { + return std::nearbyint(x); +} +#endif + +template +T quantize_val(double scale, int64_t zero_point, float value) { + // std::nearbyint results in nearest integer value according to the current + // rounding mode and the default rounding mode is rounds to even in half-way + // cases in most popular processor architectures like x86 and ARM. This is + // typically faster than an alternatives like std::round that rounds half-way + // cases away from zero, and can be consistent with SIMD implementations for + // example in x86 using _mm512_cvtps_epi32 or mm512_round_ps with + // _MM_FROUND_CUR_DIRECTION option that also follow the current rounding mode. + int64_t qvalue; + constexpr int64_t qmin = std::numeric_limits::min(); + constexpr int64_t qmax = std::numeric_limits::max(); + float inv_scale = 1.0f / static_cast(scale); + qvalue = static_cast(zero_point + Round(value * inv_scale)); + qvalue = std::max(qvalue, qmin); + qvalue = std::min(qvalue, qmax); + return static_cast(qvalue); +} + +template +float dequantize_val(double scale, int64_t zero_point, T value) { + // We need to convert the qint8 value to float to ensure the subtraction + // subexpression returns a float + return (static_cast(value.val_) - zero_point) * scale; +} +#endif // USE_FBGEMM +// END copy-pasted code from pytorch core + template void qroi_align_forward_kernel_impl( int n_rois, @@ -46,19 +129,19 @@ void qroi_align_forward_kernel_impl( // Do not using rounding; this implementation detail is critical float offset = aligned ? 0.5 : 0.; float roi_start_w = - at::native::dequantize_val(rois_scale, rois_zp, offset_rois[1]) * + dequantize_val(rois_scale, rois_zp, offset_rois[1]) * spatial_scale - offset; float roi_start_h = - at::native::dequantize_val(rois_scale, rois_zp, offset_rois[2]) * + dequantize_val(rois_scale, rois_zp, offset_rois[2]) * spatial_scale - offset; float roi_end_w = - at::native::dequantize_val(rois_scale, rois_zp, offset_rois[3]) * + dequantize_val(rois_scale, rois_zp, offset_rois[3]) * spatial_scale - offset; float roi_end_h = - at::native::dequantize_val(rois_scale, rois_zp, offset_rois[4]) * + dequantize_val(rois_scale, rois_zp, offset_rois[4]) * spatial_scale - offset; @@ -134,8 +217,7 @@ void qroi_align_forward_kernel_impl( output_val /= count; // Average pooling - output[index] = - at::native::quantize_val(input_scale, input_zp, output_val); + output[index] = quantize_val(input_scale, input_zp, output_val); } // for pw } // for ph } // for c From 7a0184aa2dc8cb0ec2fd4f7cfe79dfb3717ee302 Mon Sep 17 00:00:00 2001 From: Vasilis Vryniotis Date: Thu, 26 Aug 2021 09:47:13 +0100 Subject: [PATCH 07/12] [FBcode->GH] Moving logging on base class. (#4319) --- torchvision/datasets/cifar.py | 1 - torchvision/datasets/mnist.py | 1 - torchvision/datasets/vision.py | 1 + 3 files changed, 1 insertion(+), 2 deletions(-) diff --git a/torchvision/datasets/cifar.py b/torchvision/datasets/cifar.py index 47b2bd41fb0..17a2b5ee9cd 100644 --- a/torchvision/datasets/cifar.py +++ b/torchvision/datasets/cifar.py @@ -59,7 +59,6 @@ def __init__( super(CIFAR10, self).__init__(root, transform=transform, target_transform=target_transform) - torch._C._log_api_usage_once(f"torchvision.datasets.{self.__class__.__name__}") self.train = train # training set or test set diff --git a/torchvision/datasets/mnist.py b/torchvision/datasets/mnist.py index 0467854e9b3..237a135722f 100644 --- a/torchvision/datasets/mnist.py +++ b/torchvision/datasets/mnist.py @@ -77,7 +77,6 @@ def __init__( ) -> None: super(MNIST, self).__init__(root, transform=transform, target_transform=target_transform) - torch._C._log_api_usage_once(f"torchvision.datasets.{self.__class__.__name__}") self.train = train # training set or test set if self._check_legacy_exist(): diff --git a/torchvision/datasets/vision.py b/torchvision/datasets/vision.py index 2cc9ce14cb1..db44a8b1ba0 100644 --- a/torchvision/datasets/vision.py +++ b/torchvision/datasets/vision.py @@ -31,6 +31,7 @@ def __init__( transform: Optional[Callable] = None, target_transform: Optional[Callable] = None, ) -> None: + torch._C._log_api_usage_once(f"torchvision.datasets.{self.__class__.__name__}") if isinstance(root, torch._six.string_classes): root = os.path.expanduser(root) self.root = root From d004d7798e7874179e9be5671244ca8b3f2760ec Mon Sep 17 00:00:00 2001 From: Vasilis Vryniotis Date: Thu, 26 Aug 2021 10:38:45 +0100 Subject: [PATCH 08/12] Fix broken clang format test. (#4320) --- .../csrc/ops/quantized/cpu/qroi_align_kernel.cpp | 12 ++++-------- 1 file changed, 4 insertions(+), 8 deletions(-) diff --git a/torchvision/csrc/ops/quantized/cpu/qroi_align_kernel.cpp b/torchvision/csrc/ops/quantized/cpu/qroi_align_kernel.cpp index 20d4f503501..15f468b31e7 100644 --- a/torchvision/csrc/ops/quantized/cpu/qroi_align_kernel.cpp +++ b/torchvision/csrc/ops/quantized/cpu/qroi_align_kernel.cpp @@ -129,20 +129,16 @@ void qroi_align_forward_kernel_impl( // Do not using rounding; this implementation detail is critical float offset = aligned ? 0.5 : 0.; float roi_start_w = - dequantize_val(rois_scale, rois_zp, offset_rois[1]) * - spatial_scale - + dequantize_val(rois_scale, rois_zp, offset_rois[1]) * spatial_scale - offset; float roi_start_h = - dequantize_val(rois_scale, rois_zp, offset_rois[2]) * - spatial_scale - + dequantize_val(rois_scale, rois_zp, offset_rois[2]) * spatial_scale - offset; float roi_end_w = - dequantize_val(rois_scale, rois_zp, offset_rois[3]) * - spatial_scale - + dequantize_val(rois_scale, rois_zp, offset_rois[3]) * spatial_scale - offset; float roi_end_h = - dequantize_val(rois_scale, rois_zp, offset_rois[4]) * - spatial_scale - + dequantize_val(rois_scale, rois_zp, offset_rois[4]) * spatial_scale - offset; float roi_width = roi_end_w - roi_start_w; From 37a9ee5b3aead821dc1f795ec9274ccbeea695bb Mon Sep 17 00:00:00 2001 From: Vasilis Vryniotis Date: Thu, 26 Aug 2021 11:03:37 +0100 Subject: [PATCH 09/12] Add EfficientNet Architecture in TorchVision (#4293) * Adding code skeleton * Adding MBConvConfig. * Extend SqueezeExcitation to support custom min_value and activation. * Implement MBConv. * Replace stochastic_depth with operator. * Adding the rest of the EfficientNet implementation * Update torchvision/models/efficientnet.py * Replacing 1st activation of SE with SiLU. * Adding efficientnet_b3. * Replace mobilenetv3 assets with custom. * Switch to standard sigmoid and reconfiguring BN. * Reconfiguration of efficientnet. * Add repr * Add weights. * Update weights. * Adding B5-B7 weights. * Update docs and hubconf. * Fix doc link. * Fix typo on comment. --- docs/source/models.rst | 43 +- hubconf.py | 2 + references/classification/README.md | 6 + references/classification/presets.py | 6 +- references/classification/train.py | 17 +- ...odelTester.test_efficientnet_b0_expect.pkl | Bin 0 -> 939 bytes ...odelTester.test_efficientnet_b1_expect.pkl | Bin 0 -> 939 bytes ...odelTester.test_efficientnet_b2_expect.pkl | Bin 0 -> 939 bytes ...odelTester.test_efficientnet_b3_expect.pkl | Bin 0 -> 939 bytes ...odelTester.test_efficientnet_b4_expect.pkl | Bin 0 -> 939 bytes ...odelTester.test_efficientnet_b5_expect.pkl | Bin 0 -> 939 bytes ...odelTester.test_efficientnet_b6_expect.pkl | Bin 0 -> 939 bytes ...odelTester.test_efficientnet_b7_expect.pkl | Bin 0 -> 939 bytes torchvision/models/__init__.py | 1 + torchvision/models/efficientnet.py | 369 ++++++++++++++++++ torchvision/ops/stochastic_depth.py | 4 +- 16 files changed, 441 insertions(+), 7 deletions(-) create mode 100644 test/expect/ModelTester.test_efficientnet_b0_expect.pkl create mode 100644 test/expect/ModelTester.test_efficientnet_b1_expect.pkl create mode 100644 test/expect/ModelTester.test_efficientnet_b2_expect.pkl create mode 100644 test/expect/ModelTester.test_efficientnet_b3_expect.pkl create mode 100644 test/expect/ModelTester.test_efficientnet_b4_expect.pkl create mode 100644 test/expect/ModelTester.test_efficientnet_b5_expect.pkl create mode 100644 test/expect/ModelTester.test_efficientnet_b6_expect.pkl create mode 100644 test/expect/ModelTester.test_efficientnet_b7_expect.pkl create mode 100644 torchvision/models/efficientnet.py diff --git a/docs/source/models.rst b/docs/source/models.rst index b9bff7a36e8..64ca69f47ae 100644 --- a/docs/source/models.rst +++ b/docs/source/models.rst @@ -27,6 +27,7 @@ architectures for image classification: - `ResNeXt`_ - `Wide ResNet`_ - `MNASNet`_ +- `EfficientNet`_ You can construct a model with random weights by calling its constructor: @@ -47,6 +48,14 @@ You can construct a model with random weights by calling its constructor: resnext50_32x4d = models.resnext50_32x4d() wide_resnet50_2 = models.wide_resnet50_2() mnasnet = models.mnasnet1_0() + efficientnet_b0 = models.efficientnet_b0() + efficientnet_b1 = models.efficientnet_b1() + efficientnet_b2 = models.efficientnet_b2() + efficientnet_b3 = models.efficientnet_b3() + efficientnet_b4 = models.efficientnet_b4() + efficientnet_b5 = models.efficientnet_b5() + efficientnet_b6 = models.efficientnet_b6() + efficientnet_b7 = models.efficientnet_b7() We provide pre-trained models, using the PyTorch :mod:`torch.utils.model_zoo`. These can be constructed by passing ``pretrained=True``: @@ -68,6 +77,14 @@ These can be constructed by passing ``pretrained=True``: resnext50_32x4d = models.resnext50_32x4d(pretrained=True) wide_resnet50_2 = models.wide_resnet50_2(pretrained=True) mnasnet = models.mnasnet1_0(pretrained=True) + efficientnet_b0 = models.efficientnet_b0(pretrained=True) + efficientnet_b1 = models.efficientnet_b1(pretrained=True) + efficientnet_b2 = models.efficientnet_b2(pretrained=True) + efficientnet_b3 = models.efficientnet_b3(pretrained=True) + efficientnet_b4 = models.efficientnet_b4(pretrained=True) + efficientnet_b5 = models.efficientnet_b5(pretrained=True) + efficientnet_b6 = models.efficientnet_b6(pretrained=True) + efficientnet_b7 = models.efficientnet_b7(pretrained=True) Instancing a pre-trained model will download its weights to a cache directory. This directory can be set using the `TORCH_MODEL_ZOO` environment variable. See @@ -113,7 +130,10 @@ Unfortunately, the concrete `subset` that was used is lost. For more information see `this discussion `_ or `these experiments `_. -ImageNet 1-crop error rates (224x224) +The sizes of the EfficientNet models depend on the variant. For the exact input sizes +`check here `_ + +ImageNet 1-crop error rates ================================ ============= ============= Model Acc@1 Acc@5 @@ -151,6 +171,14 @@ Wide ResNet-50-2 78.468 94.086 Wide ResNet-101-2 78.848 94.284 MNASNet 1.0 73.456 91.510 MNASNet 0.5 67.734 87.490 +EfficientNet-B0 77.692 93.532 +EfficientNet-B1 78.642 94.186 +EfficientNet-B2 80.608 95.310 +EfficientNet-B3 82.008 96.054 +EfficientNet-B4 83.384 96.594 +EfficientNet-B5 83.444 96.628 +EfficientNet-B6 84.008 96.916 +EfficientNet-B7 84.122 96.908 ================================ ============= ============= @@ -166,6 +194,7 @@ MNASNet 0.5 67.734 87.490 .. _MobileNetV3: https://arxiv.org/abs/1905.02244 .. _ResNeXt: https://arxiv.org/abs/1611.05431 .. _MNASNet: https://arxiv.org/abs/1807.11626 +.. _EfficientNet: https://arxiv.org/abs/1905.11946 .. currentmodule:: torchvision.models @@ -267,6 +296,18 @@ MNASNet .. autofunction:: mnasnet1_0 .. autofunction:: mnasnet1_3 +EfficientNet +------------ + +.. autofunction:: efficientnet_b0 +.. autofunction:: efficientnet_b1 +.. autofunction:: efficientnet_b2 +.. autofunction:: efficientnet_b3 +.. autofunction:: efficientnet_b4 +.. autofunction:: efficientnet_b5 +.. autofunction:: efficientnet_b6 +.. autofunction:: efficientnet_b7 + Quantized Models ---------------- diff --git a/hubconf.py b/hubconf.py index 097759bdd89..2bff6850525 100644 --- a/hubconf.py +++ b/hubconf.py @@ -15,6 +15,8 @@ from torchvision.models.mobilenetv3 import mobilenet_v3_large, mobilenet_v3_small from torchvision.models.mnasnet import mnasnet0_5, mnasnet0_75, mnasnet1_0, \ mnasnet1_3 +from torchvision.models.efficientnet import efficientnet_b0, efficientnet_b1, efficientnet_b2, \ + efficientnet_b3, efficientnet_b4, efficientnet_b5, efficientnet_b6, efficientnet_b7 # segmentation from torchvision.models.segmentation import fcn_resnet50, fcn_resnet101, \ diff --git a/references/classification/README.md b/references/classification/README.md index e0b7f210175..210a63c0bca 100644 --- a/references/classification/README.md +++ b/references/classification/README.md @@ -68,6 +68,12 @@ Then we averaged the parameters of the last 3 checkpoints that improved the Acc@ and [#3354](https://github.com/pytorch/vision/pull/3354) for details. +### EfficientNet + +The weights of the B0-B4 variants are ported from Ross Wightman's [timm repo](https://github.com/rwightman/pytorch-image-models/blob/01cb46a9a50e3ba4be167965b5764e9702f09b30/timm/models/efficientnet.py#L95-L108). + +The weights of the B5-B7 variants are ported from Luke Melas' [EfficientNet-PyTorch repo](https://github.com/lukemelas/EfficientNet-PyTorch/blob/1039e009545d9329ea026c9f7541341439712b96/efficientnet_pytorch/utils.py#L562-L564). + ## Mixed precision training Automatic Mixed Precision (AMP) training on GPU for Pytorch can be enabled with the [NVIDIA Apex extension](https://github.com/NVIDIA/apex). diff --git a/references/classification/presets.py b/references/classification/presets.py index 6bb389ba8db..ce5a6fe414f 100644 --- a/references/classification/presets.py +++ b/references/classification/presets.py @@ -1,4 +1,5 @@ from torchvision.transforms import autoaugment, transforms +from torchvision.transforms.functional import InterpolationMode class ClassificationPresetTrain: @@ -24,10 +25,11 @@ def __call__(self, img): class ClassificationPresetEval: - def __init__(self, crop_size, resize_size=256, mean=(0.485, 0.456, 0.406), std=(0.229, 0.224, 0.225)): + def __init__(self, crop_size, resize_size=256, mean=(0.485, 0.456, 0.406), std=(0.229, 0.224, 0.225), + interpolation=InterpolationMode.BILINEAR): self.transforms = transforms.Compose([ - transforms.Resize(resize_size), + transforms.Resize(resize_size, interpolation=interpolation), transforms.CenterCrop(crop_size), transforms.ToTensor(), transforms.Normalize(mean=mean, std=std), diff --git a/references/classification/train.py b/references/classification/train.py index b4e9d274662..9ba99b3dc54 100644 --- a/references/classification/train.py +++ b/references/classification/train.py @@ -6,6 +6,7 @@ import torch.utils.data from torch import nn import torchvision +from torchvision.transforms.functional import InterpolationMode import presets import utils @@ -82,7 +83,18 @@ def _get_cache_path(filepath): def load_data(traindir, valdir, args): # Data loading code print("Loading data") - resize_size, crop_size = (342, 299) if args.model == 'inception_v3' else (256, 224) + resize_size, crop_size = 256, 224 + interpolation = InterpolationMode.BILINEAR + if args.model == 'inception_v3': + resize_size, crop_size = 342, 299 + elif args.model.startswith('efficientnet_'): + sizes = { + 'b0': (256, 224), 'b1': (256, 240), 'b2': (288, 288), 'b3': (320, 300), + 'b4': (384, 380), 'b5': (456, 456), 'b6': (528, 528), 'b7': (600, 600), + } + e_type = args.model.replace('efficientnet_', '') + resize_size, crop_size = sizes[e_type] + interpolation = InterpolationMode.BICUBIC print("Loading training data") st = time.time() @@ -113,7 +125,8 @@ def load_data(traindir, valdir, args): else: dataset_test = torchvision.datasets.ImageFolder( valdir, - presets.ClassificationPresetEval(crop_size=crop_size, resize_size=resize_size)) + presets.ClassificationPresetEval(crop_size=crop_size, resize_size=resize_size, + interpolation=interpolation)) if args.cache_dataset: print("Saving dataset_test to {}".format(cache_path)) utils.mkdir(os.path.dirname(cache_path)) diff --git a/test/expect/ModelTester.test_efficientnet_b0_expect.pkl b/test/expect/ModelTester.test_efficientnet_b0_expect.pkl new file mode 100644 index 0000000000000000000000000000000000000000..1de871ce0fbea9ddbab7e315b05f864bc5f6fa53 GIT binary patch literal 939 zcmWIWW@cev;NW1u00Im`42ea_8JT6N`YDMeiFyUuIc`pT3{fbcfhoBpAE-(%zO*DW zr zf)S|3ppZF&8AvA=loqmh8ZvUemW=jY_4CYNO9=M{7L z7p0^YrKY%KCYNv(a%ct>a+VZw1r>7Z1$eV_Fj*X^nFTZrgadH;l#f9R#i#lPZcb`w z{zUOK5-018ua*Pfs^#xPFi_; zv9K0Hr_XYs!)hxHiiB76xiM&!7+hHqaUyty&u#rxmqYZm9FCn>e!TV0N{8tctD22Y zto-_HtET<$?;8KNtkLvZ8@#Hby-)M{2L`S7kJnb}?onTL4 z(nWmJ3Z2()R)9jw@aV*QCx9UZ!ni|=pTQa)T4kw4#lTo_b229~xR62)!ZhXr*?e(c zdMFdnRuB&GW&~02G>IIC0w4(#fSy9pbtC(U4@KuIAP-r$z5%*kWLNQ{=#>Dv5T+Lz z1_9n|Y&uXya?HAL<)Fk20x)_zgv&4q>`9P!*+6-N!4s+glnDa7S=m5h%s>cI4^ayM DfM5D$ literal 0 HcmV?d00001 diff --git a/test/expect/ModelTester.test_efficientnet_b1_expect.pkl b/test/expect/ModelTester.test_efficientnet_b1_expect.pkl new file mode 100644 index 0000000000000000000000000000000000000000..1499a97028eef11527829f13476152d00d8cb90b GIT binary patch literal 939 zcmWIWW@cev;NW1u00Im`42ea_8JT6N`YDMeiFyUuIc`pT3{fbcfhoBpAE-(%zO*DW zr zf)S|3ppZF&8AvA=loqmh8ZvUemW=jY_4CYNO9=M{7L z7p0^YrKY%KCYNv(a%ct>a+VZw1r>7Z1$eV_Fj*X^nFTZrgadH;l#f9R#i#lPZcb`w z{zUOK5~t%{$I3Y`E@{3?+`dBT8o%a@g$Aqq3Jo-$FwI0(K3b>rdNr2iX=piAm1#2Z zO;|bi!lIQ1zr8e9xlCI1lOs**LQgMRB!L3ZQz*J_WIyqt=zIm_A?wyRK-Y`xDt;8b5 zf)S|3ppZF&8AvA=loqmh8ZvUemW=jY_4CYNO9=M{7L z7p0^YrKY%KCYNv(a%ct>a+VZw1r>7Z1$eV_Fj*X^nFTZrgadH;l#f9R#i#lPZcb`w z{zUOK66alKk>+9fY|Tr8yERh|pIVuo7OWY`RI%#8jfLuN4~jJfR_AL9?)tFuz?+TFl9&C0F@kMb};5l`hRjTFp}$S6(SPu~KL9%oRO1ertT=vDW&>x@zS- zMK-O2#~*8U{L$805UjpjcEj!!cM1+_?7Q@9rNm2RtqNw|RX6lIRtn1XuH3Qi#R`Y1 zdo^FR-d-8P>!NXi?fc3_-Qrq2Yx^~L>v>j9KiRdCF_%NjC8b?6d+Prc;q5Y7Hp`f_ z!l!nv(0wPN1q!XA=X2do07D9dafcQ^gEc&~%2JDpfwAD`WKLvoA%z@-Y0L$(`Qp6v zP$r zf)S|3ppZF&8AvA=loqmh8ZvUemW=jY_4CYNO9=M{7L z7p0^YrKY%KCYNv(a%ct>a+VZw1r>7Z1$eV_Fj*X^nFTZrgadH;l#f9R#i#lPZcb`w z{zUOK5-0rg$`xua*)#*ASv8-Wj$84~;r;v-qC8VI zf4EOtIawlHlOa8Dg>ql_$|F*b)x+)_Tj@}_QMGFFL`^>tDa~m*3M*Nj`L6u6KWZh1 z1FyQvf>|rGo`q`Op;eYzR1Ay-Hz#ul>i!MRpZGie3qz3t@Vp zVG!WW#-;;RB*&}^R}M zf)S|3ppZF&8AvA=loqmh8ZvUemW=jY_4CYNO9=M{7L z7p0^YrKY%KCYNv(a%ct>a+VZw1r>7Z1$eV_Fj*X^nFTZrgadH;l#f9R#i#lPZcb`w z{zUOK633uVe8mUTA*RQ;o@?ggxAaD)Qw!yl z2j1LJx9*Zz@kNS9qr2zmiZ!=iEN8jVptiPRnZ|@YTQqDQhHA`;3fE9kTCAbdE3m># z@uCL9zWvHBdOJ0^vQ$@24Sb<-|B=ATcia)n?ZXn5H>~ARx4t%U1sDIN70%z6t*}tL zqp|4nD)koT^c8z2{`hR zH_^`J&(bwCK%pi0=k$~lz>orA+@ZzKU=0tgvecqtU@W*fnG+dYNFfJd8gqeczBn&E zlnH1n2nTpGf+%>JM2 zf)S|3ppZF&8AvA=loqmh8ZvUemW=jY_4CYNO9=M{7L z7p0^YrKY%KCYNv(a%ct>a+VZw1r>7Z1$eV_Fj*X^nFTZrgadH;l#f9R#i#lPZcb`w z{zUOK66d#r>56sJ&S*R+oxft`o>?nReQ#-K#dT;*IU=KeY{@2#7pI#ww64TzG)1bf z^pM}Dp>rTo!^2rr^H|I_js5X&HN4ELG<4DfS6GT4*9g)-t)%}h80Eu$Cvwv zwP|R~ch&qqalQsygNw$WmjW7mO*hrE-gK^vdT?#U-;c935{g?_$af`Z*ce|~5%k$r z-LCAE#w_*^E9zhAYGi2osO8;`(>!x>-txTXZki38Jj*|prmDYLb!Yj>y>m2nttwq{ z+;zLgtLNvJgF;Iuqqg(}Fr+{jcWCi5Si?iBEVZZ@7z=Jr=0pY;QpiD=##|tqFV0I3 zWdhm?!U5ilAPSx)k>gMRB!L3ZQz*J_WIyqt=zIm_A?wyRK-Y`xDt;8b5 zf)S|3ppZF&8AvA=loqmh8ZvUemW=jY_4CYNO9=M{7L z7p0^YrKY%KCYNv(a%ct>a+VZw1r>7Z1$eV_Fj*X^nFTZrgadH;l#f9R#i#lPZcb`w z{zUOK5=Y_3bG6o+A}hY|ny%RLJyzo%i<|~m{wj?#m%l8%aDD%ZY4XbID^Cfom=ir= z#j2_Q)DQGjFWcTCydp%~Tw~AIIQ6={tr{YV50}j|x}x!JN9Br)Fq0Kk-@DZ_FD_qR zah`w0AFaL>3_2fFzj4|wf04Lo>8fuR)Z64HtMja!w0wc?k!7u+5$ZApEz93eb6>u4 z?ph7Y-)S0OANDOTtYy@Y$kbVWHT;@-z=^`;NzZ%K`3@ghv7#?iL+^R^a*vgB)fNcE zsa|$opwV#n#u89yHO8NHI{^$S5XK!^{0!Fc&?-wUDh9@ao0B<_!G#oZ5T-E~$mWal z(nFbmwt{egHzSCGr%B{E6aYz}0Q3}!t{d4;d?-3!0eQ%}^$pPVBD;zoMXv zf)S|3ppZF&8AvA=loqmh8ZvUemW=jY_4CYNO9=M{7L z7p0^YrKY%KCYNv(a%ct>a+VZw1r>7Z1$eV_Fj*X^nFTZrgadH;l#f9R#i#lPZcb`w z{zUOK66f2#>&qwI;$Lpnv`Ia40_zH`Wabr~)w5T$-CTQ5p>vS#q@6>8VfGesIDp9ygcE?UX46s|K;Jf*2^n4GcJ3PsHFa-`t~xB zIg;uTPBm&(3TM=K!UWXW&7Ll^^4zVyW&ieNr#a6oUo_Qkg=OWh Tensor: + scale = F.adaptive_avg_pool2d(input, 1) + scale = self.fc1(scale) + scale = F.silu(scale, inplace=True) + scale = self.fc2(scale) + return scale.sigmoid() + + def forward(self, input: Tensor) -> Tensor: + scale = self._scale(input) + return scale * input + + +class MBConvConfig: + # Stores information listed at Table 1 of the EfficientNet paper + def __init__(self, + expand_ratio: float, kernel: int, stride: int, + input_channels: int, out_channels: int, num_layers: int, + width_mult: float, depth_mult: float) -> None: + self.expand_ratio = expand_ratio + self.kernel = kernel + self.stride = stride + self.input_channels = self.adjust_channels(input_channels, width_mult) + self.out_channels = self.adjust_channels(out_channels, width_mult) + self.num_layers = self.adjust_depth(num_layers, depth_mult) + + def __repr__(self) -> str: + s = self.__class__.__name__ + '(' + s += 'expand_ratio={expand_ratio}' + s += ', kernel={kernel}' + s += ', stride={stride}' + s += ', input_channels={input_channels}' + s += ', out_channels={out_channels}' + s += ', num_layers={num_layers}' + s += ')' + return s.format(**self.__dict__) + + @staticmethod + def adjust_channels(channels: int, width_mult: float, min_value: Optional[int] = None) -> int: + return _make_divisible(channels * width_mult, 8, min_value) + + @staticmethod + def adjust_depth(num_layers: int, depth_mult: float): + return int(math.ceil(num_layers * depth_mult)) + + +class MBConv(nn.Module): + def __init__(self, cnf: MBConvConfig, stochastic_depth_prob: float, norm_layer: Callable[..., nn.Module], + se_layer: Callable[..., nn.Module] = SqueezeExcitation) -> None: + super().__init__() + + if not (1 <= cnf.stride <= 2): + raise ValueError('illegal stride value') + + self.use_res_connect = cnf.stride == 1 and cnf.input_channels == cnf.out_channels + + layers: List[nn.Module] = [] + activation_layer = nn.SiLU + + # expand + expanded_channels = cnf.adjust_channels(cnf.input_channels, cnf.expand_ratio) + if expanded_channels != cnf.input_channels: + layers.append(ConvBNActivation(cnf.input_channels, expanded_channels, kernel_size=1, + norm_layer=norm_layer, activation_layer=activation_layer)) + + # depthwise + layers.append(ConvBNActivation(expanded_channels, expanded_channels, kernel_size=cnf.kernel, + stride=cnf.stride, groups=expanded_channels, + norm_layer=norm_layer, activation_layer=activation_layer)) + + # squeeze and excitation + squeeze_channels = max(1, cnf.input_channels // 4) + layers.append(se_layer(expanded_channels, squeeze_channels)) + + # project + layers.append(ConvBNActivation(expanded_channels, cnf.out_channels, kernel_size=1, norm_layer=norm_layer, + activation_layer=nn.Identity)) + + self.block = nn.Sequential(*layers) + self.stochastic_depth = StochasticDepth(stochastic_depth_prob, "row") + self.out_channels = cnf.out_channels + + def forward(self, input: Tensor) -> Tensor: + result = self.block(input) + if self.use_res_connect: + result = self.stochastic_depth(result) + result += input + return result + + +class EfficientNet(nn.Module): + def __init__( + self, + inverted_residual_setting: List[MBConvConfig], + dropout: float, + stochastic_depth_prob: float = 0.2, + num_classes: int = 1000, + block: Optional[Callable[..., nn.Module]] = None, + norm_layer: Optional[Callable[..., nn.Module]] = None, + **kwargs: Any + ) -> None: + """ + EfficientNet main class + + Args: + inverted_residual_setting (List[MBConvConfig]): Network structure + dropout (float): The droupout probability + stochastic_depth_prob (float): The stochastic depth probability + num_classes (int): Number of classes + block (Optional[Callable[..., nn.Module]]): Module specifying inverted residual building block for mobilenet + norm_layer (Optional[Callable[..., nn.Module]]): Module specifying the normalization layer to use + """ + super().__init__() + + if not inverted_residual_setting: + raise ValueError("The inverted_residual_setting should not be empty") + elif not (isinstance(inverted_residual_setting, Sequence) and + all([isinstance(s, MBConvConfig) for s in inverted_residual_setting])): + raise TypeError("The inverted_residual_setting should be List[MBConvConfig]") + + if block is None: + block = MBConv + + if norm_layer is None: + norm_layer = nn.BatchNorm2d + + layers: List[nn.Module] = [] + + # building first layer + firstconv_output_channels = inverted_residual_setting[0].input_channels + layers.append(ConvBNActivation(3, firstconv_output_channels, kernel_size=3, stride=2, norm_layer=norm_layer, + activation_layer=nn.SiLU)) + + # building inverted residual blocks + total_stage_blocks = sum([cnf.num_layers for cnf in inverted_residual_setting]) + stage_block_id = 0 + for cnf in inverted_residual_setting: + stage: List[nn.Module] = [] + for _ in range(cnf.num_layers): + # copy to avoid modifications. shallow copy is enough + block_cnf = copy.copy(cnf) + + # overwrite info if not the first conv in the stage + if stage: + block_cnf.input_channels = block_cnf.out_channels + block_cnf.stride = 1 + + # adjust stochastic depth probability based on the depth of the stage block + sd_prob = stochastic_depth_prob * float(stage_block_id) / total_stage_blocks + + stage.append(block(block_cnf, sd_prob, norm_layer)) + stage_block_id += 1 + + layers.append(nn.Sequential(*stage)) + + # building last several layers + lastconv_input_channels = inverted_residual_setting[-1].out_channels + lastconv_output_channels = 4 * lastconv_input_channels + layers.append(ConvBNActivation(lastconv_input_channels, lastconv_output_channels, kernel_size=1, + norm_layer=norm_layer, activation_layer=nn.SiLU)) + + self.features = nn.Sequential(*layers) + self.avgpool = nn.AdaptiveAvgPool2d(1) + self.classifier = nn.Sequential( + nn.Dropout(p=dropout, inplace=True), + nn.Linear(lastconv_output_channels, num_classes), + ) + + for m in self.modules(): + if isinstance(m, nn.Conv2d): + nn.init.kaiming_normal_(m.weight, mode='fan_out') + if m.bias is not None: + nn.init.zeros_(m.bias) + elif isinstance(m, (nn.BatchNorm2d, nn.GroupNorm)): + nn.init.ones_(m.weight) + nn.init.zeros_(m.bias) + elif isinstance(m, nn.Linear): + init_range = 1.0 / math.sqrt(m.out_features) + nn.init.uniform_(m.weight, -init_range, init_range) + nn.init.zeros_(m.bias) + + def _forward_impl(self, x: Tensor) -> Tensor: + x = self.features(x) + + x = self.avgpool(x) + x = torch.flatten(x, 1) + + x = self.classifier(x) + + return x + + def forward(self, x: Tensor) -> Tensor: + return self._forward_impl(x) + + +def _efficientnet_conf(width_mult: float, depth_mult: float, **kwargs: Any) -> List[MBConvConfig]: + bneck_conf = partial(MBConvConfig, width_mult=width_mult, depth_mult=depth_mult) + inverted_residual_setting = [ + bneck_conf(1, 3, 1, 32, 16, 1), + bneck_conf(6, 3, 2, 16, 24, 2), + bneck_conf(6, 5, 2, 24, 40, 2), + bneck_conf(6, 3, 2, 40, 80, 3), + bneck_conf(6, 5, 1, 80, 112, 3), + bneck_conf(6, 5, 2, 112, 192, 4), + bneck_conf(6, 3, 1, 192, 320, 1), + ] + return inverted_residual_setting + + +def _efficientnet_model( + arch: str, + inverted_residual_setting: List[MBConvConfig], + dropout: float, + pretrained: bool, + progress: bool, + **kwargs: Any +) -> EfficientNet: + model = EfficientNet(inverted_residual_setting, dropout, **kwargs) + if pretrained: + if model_urls.get(arch, None) is None: + raise ValueError("No checkpoint is available for model type {}".format(arch)) + state_dict = load_state_dict_from_url(model_urls[arch], progress=progress) + model.load_state_dict(state_dict) + return model + + +def efficientnet_b0(pretrained: bool = False, progress: bool = True, **kwargs: Any) -> EfficientNet: + """ + Constructs a EfficientNet B0 architecture from + `"EfficientNet: Rethinking Model Scaling for Convolutional Neural Networks" `_. + + Args: + pretrained (bool): If True, returns a model pre-trained on ImageNet + progress (bool): If True, displays a progress bar of the download to stderr + """ + inverted_residual_setting = _efficientnet_conf(width_mult=1.0, depth_mult=1.0, **kwargs) + return _efficientnet_model("efficientnet_b0", inverted_residual_setting, 0.2, pretrained, progress, **kwargs) + + +def efficientnet_b1(pretrained: bool = False, progress: bool = True, **kwargs: Any) -> EfficientNet: + """ + Constructs a EfficientNet B1 architecture from + `"EfficientNet: Rethinking Model Scaling for Convolutional Neural Networks" `_. + + Args: + pretrained (bool): If True, returns a model pre-trained on ImageNet + progress (bool): If True, displays a progress bar of the download to stderr + """ + inverted_residual_setting = _efficientnet_conf(width_mult=1.0, depth_mult=1.1, **kwargs) + return _efficientnet_model("efficientnet_b1", inverted_residual_setting, 0.2, pretrained, progress, **kwargs) + + +def efficientnet_b2(pretrained: bool = False, progress: bool = True, **kwargs: Any) -> EfficientNet: + """ + Constructs a EfficientNet B2 architecture from + `"EfficientNet: Rethinking Model Scaling for Convolutional Neural Networks" `_. + + Args: + pretrained (bool): If True, returns a model pre-trained on ImageNet + progress (bool): If True, displays a progress bar of the download to stderr + """ + inverted_residual_setting = _efficientnet_conf(width_mult=1.1, depth_mult=1.2, **kwargs) + return _efficientnet_model("efficientnet_b2", inverted_residual_setting, 0.3, pretrained, progress, **kwargs) + + +def efficientnet_b3(pretrained: bool = False, progress: bool = True, **kwargs: Any) -> EfficientNet: + """ + Constructs a EfficientNet B3 architecture from + `"EfficientNet: Rethinking Model Scaling for Convolutional Neural Networks" `_. + + Args: + pretrained (bool): If True, returns a model pre-trained on ImageNet + progress (bool): If True, displays a progress bar of the download to stderr + """ + inverted_residual_setting = _efficientnet_conf(width_mult=1.2, depth_mult=1.4, **kwargs) + return _efficientnet_model("efficientnet_b3", inverted_residual_setting, 0.3, pretrained, progress, **kwargs) + + +def efficientnet_b4(pretrained: bool = False, progress: bool = True, **kwargs: Any) -> EfficientNet: + """ + Constructs a EfficientNet B4 architecture from + `"EfficientNet: Rethinking Model Scaling for Convolutional Neural Networks" `_. + + Args: + pretrained (bool): If True, returns a model pre-trained on ImageNet + progress (bool): If True, displays a progress bar of the download to stderr + """ + inverted_residual_setting = _efficientnet_conf(width_mult=1.4, depth_mult=1.8, **kwargs) + return _efficientnet_model("efficientnet_b4", inverted_residual_setting, 0.4, pretrained, progress, **kwargs) + + +def efficientnet_b5(pretrained: bool = False, progress: bool = True, **kwargs: Any) -> EfficientNet: + """ + Constructs a EfficientNet B5 architecture from + `"EfficientNet: Rethinking Model Scaling for Convolutional Neural Networks" `_. + + Args: + pretrained (bool): If True, returns a model pre-trained on ImageNet + progress (bool): If True, displays a progress bar of the download to stderr + """ + inverted_residual_setting = _efficientnet_conf(width_mult=1.6, depth_mult=2.2, **kwargs) + return _efficientnet_model("efficientnet_b5", inverted_residual_setting, 0.4, pretrained, progress, + norm_layer=partial(nn.BatchNorm2d, eps=0.001, momentum=0.01), **kwargs) + + +def efficientnet_b6(pretrained: bool = False, progress: bool = True, **kwargs: Any) -> EfficientNet: + """ + Constructs a EfficientNet B6 architecture from + `"EfficientNet: Rethinking Model Scaling for Convolutional Neural Networks" `_. + + Args: + pretrained (bool): If True, returns a model pre-trained on ImageNet + progress (bool): If True, displays a progress bar of the download to stderr + """ + inverted_residual_setting = _efficientnet_conf(width_mult=1.8, depth_mult=2.6, **kwargs) + return _efficientnet_model("efficientnet_b6", inverted_residual_setting, 0.5, pretrained, progress, + norm_layer=partial(nn.BatchNorm2d, eps=0.001, momentum=0.01), **kwargs) + + +def efficientnet_b7(pretrained: bool = False, progress: bool = True, **kwargs: Any) -> EfficientNet: + """ + Constructs a EfficientNet B7 architecture from + `"EfficientNet: Rethinking Model Scaling for Convolutional Neural Networks" `_. + + Args: + pretrained (bool): If True, returns a model pre-trained on ImageNet + progress (bool): If True, displays a progress bar of the download to stderr + """ + inverted_residual_setting = _efficientnet_conf(width_mult=2.0, depth_mult=3.1, **kwargs) + return _efficientnet_model("efficientnet_b7", inverted_residual_setting, 0.5, pretrained, progress, + norm_layer=partial(nn.BatchNorm2d, eps=0.001, momentum=0.01), **kwargs) diff --git a/torchvision/ops/stochastic_depth.py b/torchvision/ops/stochastic_depth.py index f3338242a76..0b95e7cca67 100644 --- a/torchvision/ops/stochastic_depth.py +++ b/torchvision/ops/stochastic_depth.py @@ -22,12 +22,12 @@ def stochastic_depth(input: Tensor, p: float, mode: str, training: bool = True) """ if p < 0.0 or p > 1.0: raise ValueError("drop probability has to be between 0 and 1, but got {}".format(p)) + if mode not in ["batch", "row"]: + raise ValueError("mode has to be either 'batch' or 'row', but got {}".format(mode)) if not training or p == 0.0: return input survival_rate = 1.0 - p - if mode not in ["batch", "row"]: - raise ValueError("mode has to be either 'batch' or 'row', but got {}".format(mode)) size = [1] * input.ndim if mode == "row": size[0] = input.shape[0] From 96f6e0a117d5c56f7e0237851dbb96144ebb110b Mon Sep 17 00:00:00 2001 From: Vasilis Vryniotis Date: Thu, 26 Aug 2021 13:58:09 +0100 Subject: [PATCH 10/12] Make get_image_size and get_image_num_channels public. (#4321) --- references/detection/transforms.py | 8 ++--- test/test_functional_tensor.py | 20 ++++++++++- torchvision/transforms/autoaugment.py | 6 ++-- torchvision/transforms/functional.py | 38 ++++++++++++++------- torchvision/transforms/functional_pil.py | 6 ++-- torchvision/transforms/functional_tensor.py | 14 ++++---- torchvision/transforms/transforms.py | 18 +++++----- 7 files changed, 70 insertions(+), 40 deletions(-) diff --git a/references/detection/transforms.py b/references/detection/transforms.py index 8e4b8870eaf..2d50adb1295 100644 --- a/references/detection/transforms.py +++ b/references/detection/transforms.py @@ -33,7 +33,7 @@ def forward(self, image: Tensor, if torch.rand(1) < self.p: image = F.hflip(image) if target is not None: - width, _ = F._get_image_size(image) + width, _ = F.get_image_size(image) target["boxes"][:, [0, 2]] = width - target["boxes"][:, [2, 0]] if "masks" in target: target["masks"] = target["masks"].flip(-1) @@ -76,7 +76,7 @@ def forward(self, image: Tensor, elif image.ndimension() == 2: image = image.unsqueeze(0) - orig_w, orig_h = F._get_image_size(image) + orig_w, orig_h = F.get_image_size(image) while True: # sample an option @@ -157,7 +157,7 @@ def forward(self, image: Tensor, if torch.rand(1) < self.p: return image, target - orig_w, orig_h = F._get_image_size(image) + orig_w, orig_h = F.get_image_size(image) r = self.side_range[0] + torch.rand(1) * (self.side_range[1] - self.side_range[0]) canvas_width = int(orig_w * r) @@ -226,7 +226,7 @@ def forward(self, image: Tensor, image = self._contrast(image) if r[6] < self.p: - channels = F._get_image_num_channels(image) + channels = F.get_image_num_channels(image) permutation = torch.randperm(channels) is_pil = F._is_pil_image(image) diff --git a/test/test_functional_tensor.py b/test/test_functional_tensor.py index 30ee144888c..717e2a7cb33 100644 --- a/test/test_functional_tensor.py +++ b/test/test_functional_tensor.py @@ -31,6 +31,24 @@ NEAREST, BILINEAR, BICUBIC = InterpolationMode.NEAREST, InterpolationMode.BILINEAR, InterpolationMode.BICUBIC +@pytest.mark.parametrize('device', cpu_and_gpu()) +@pytest.mark.parametrize('fn', [F.get_image_size, F.get_image_num_channels]) +def test_image_sizes(device, fn): + script_F = torch.jit.script(fn) + + img_tensor, pil_img = _create_data(16, 18, 3, device=device) + value_img = fn(img_tensor) + value_pil_img = fn(pil_img) + assert value_img == value_pil_img + + value_img_script = script_F(img_tensor) + assert value_img == value_img_script + + batch_tensors = _create_data_batch(16, 18, 3, num_samples=4, device=device) + value_img_batch = fn(batch_tensors) + assert value_img == value_img_batch + + @needs_cuda def test_scale_channel(): """Make sure that _scale_channel gives the same results on CPU and GPU as @@ -908,7 +926,7 @@ def test_resized_crop(device, mode): @pytest.mark.parametrize('device', cpu_and_gpu()) @pytest.mark.parametrize('func, args', [ - (F_t._get_image_size, ()), (F_t.vflip, ()), + (F_t.get_image_size, ()), (F_t.vflip, ()), (F_t.hflip, ()), (F_t.crop, (1, 2, 4, 5)), (F_t.adjust_brightness, (0., )), (F_t.adjust_contrast, (1., )), (F_t.adjust_hue, (-0.5, )), (F_t.adjust_saturation, (2., )), diff --git a/torchvision/transforms/autoaugment.py b/torchvision/transforms/autoaugment.py index 3b6c927a4eb..e241b821871 100644 --- a/torchvision/transforms/autoaugment.py +++ b/torchvision/transforms/autoaugment.py @@ -188,7 +188,7 @@ def forward(self, img: Tensor) -> Tensor: fill = self.fill if isinstance(img, Tensor): if isinstance(fill, (int, float)): - fill = [float(fill)] * F._get_image_num_channels(img) + fill = [float(fill)] * F.get_image_num_channels(img) elif fill is not None: fill = [float(f) for f in fill] @@ -209,10 +209,10 @@ def forward(self, img: Tensor) -> Tensor: img = F.affine(img, angle=0.0, translate=[0, 0], scale=1.0, shear=[0.0, math.degrees(magnitude)], interpolation=self.interpolation, fill=fill) elif op_name == "TranslateX": - img = F.affine(img, angle=0.0, translate=[int(F._get_image_size(img)[0] * magnitude), 0], scale=1.0, + img = F.affine(img, angle=0.0, translate=[int(F.get_image_size(img)[0] * magnitude), 0], scale=1.0, interpolation=self.interpolation, shear=[0.0, 0.0], fill=fill) elif op_name == "TranslateY": - img = F.affine(img, angle=0.0, translate=[0, int(F._get_image_size(img)[1] * magnitude)], scale=1.0, + img = F.affine(img, angle=0.0, translate=[0, int(F.get_image_size(img)[1] * magnitude)], scale=1.0, interpolation=self.interpolation, shear=[0.0, 0.0], fill=fill) elif op_name == "Rotate": img = F.rotate(img, magnitude, interpolation=self.interpolation, fill=fill) diff --git a/torchvision/transforms/functional.py b/torchvision/transforms/functional.py index dc3a9f8f68b..679631a971e 100644 --- a/torchvision/transforms/functional.py +++ b/torchvision/transforms/functional.py @@ -58,22 +58,34 @@ def _interpolation_modes_from_int(i: int) -> InterpolationMode: _is_pil_image = F_pil._is_pil_image -def _get_image_size(img: Tensor) -> List[int]: - """Returns image size as [w, h] +def get_image_size(img: Tensor) -> List[int]: + """Returns the size of an image as [width, height]. + + Args: + img (PIL Image or Tensor): The image to be checked. + + Returns: + List[int]: The image size. """ if isinstance(img, torch.Tensor): - return F_t._get_image_size(img) + return F_t.get_image_size(img) - return F_pil._get_image_size(img) + return F_pil.get_image_size(img) -def _get_image_num_channels(img: Tensor) -> int: - """Returns number of image channels +def get_image_num_channels(img: Tensor) -> int: + """Returns the number of channels of an image. + + Args: + img (PIL Image or Tensor): The image to be checked. + + Returns: + int: The number of channels. """ if isinstance(img, torch.Tensor): - return F_t._get_image_num_channels(img) + return F_t.get_image_num_channels(img) - return F_pil._get_image_num_channels(img) + return F_pil.get_image_num_channels(img) @torch.jit.unused @@ -500,7 +512,7 @@ def center_crop(img: Tensor, output_size: List[int]) -> Tensor: elif isinstance(output_size, (tuple, list)) and len(output_size) == 1: output_size = (output_size[0], output_size[0]) - image_width, image_height = _get_image_size(img) + image_width, image_height = get_image_size(img) crop_height, crop_width = output_size if crop_width > image_width or crop_height > image_height: @@ -511,7 +523,7 @@ def center_crop(img: Tensor, output_size: List[int]) -> Tensor: (crop_height - image_height + 1) // 2 if crop_height > image_height else 0, ] img = pad(img, padding_ltrb, fill=0) # PIL uses fill value 0 - image_width, image_height = _get_image_size(img) + image_width, image_height = get_image_size(img) if crop_width == image_width and crop_height == image_height: return img @@ -696,7 +708,7 @@ def five_crop(img: Tensor, size: List[int]) -> Tuple[Tensor, Tensor, Tensor, Ten if len(size) != 2: raise ValueError("Please provide only two dimensions (h, w) for size.") - image_width, image_height = _get_image_size(img) + image_width, image_height = get_image_size(img) crop_height, crop_width = size if crop_width > image_width or crop_height > image_height: msg = "Requested crop size {} is bigger than input size {}" @@ -993,7 +1005,7 @@ def rotate( center_f = [0.0, 0.0] if center is not None: - img_size = _get_image_size(img) + img_size = get_image_size(img) # Center values should be in pixel coordinates but translated such that (0, 0) corresponds to image center. center_f = [1.0 * (c - s * 0.5) for c, s in zip(center, img_size)] @@ -1094,7 +1106,7 @@ def affine( if len(shear) != 2: raise ValueError("Shear should be a sequence containing two values. Got {}".format(shear)) - img_size = _get_image_size(img) + img_size = get_image_size(img) if not isinstance(img, torch.Tensor): # center = (img_size[0] * 0.5 + 0.5, img_size[1] * 0.5 + 0.5) # it is visually better to estimate the center without 0.5 offset diff --git a/torchvision/transforms/functional_pil.py b/torchvision/transforms/functional_pil.py index 0f3d1d87bb4..67f4ff4bb33 100644 --- a/torchvision/transforms/functional_pil.py +++ b/torchvision/transforms/functional_pil.py @@ -20,14 +20,14 @@ def _is_pil_image(img: Any) -> bool: @torch.jit.unused -def _get_image_size(img: Any) -> List[int]: +def get_image_size(img: Any) -> List[int]: if _is_pil_image(img): - return img.size + return list(img.size) raise TypeError("Unexpected type {}".format(type(img))) @torch.jit.unused -def _get_image_num_channels(img: Any) -> int: +def get_image_num_channels(img: Any) -> int: if _is_pil_image(img): return 1 if img.mode == 'L' else 3 raise TypeError("Unexpected type {}".format(type(img))) diff --git a/torchvision/transforms/functional_tensor.py b/torchvision/transforms/functional_tensor.py index 3e4069bb0c0..61c07433cb6 100644 --- a/torchvision/transforms/functional_tensor.py +++ b/torchvision/transforms/functional_tensor.py @@ -16,13 +16,13 @@ def _assert_image_tensor(img: Tensor) -> None: raise TypeError("Tensor is not a torch image.") -def _get_image_size(img: Tensor) -> List[int]: +def get_image_size(img: Tensor) -> List[int]: # Returns (w, h) of tensor image _assert_image_tensor(img) return [img.shape[-1], img.shape[-2]] -def _get_image_num_channels(img: Tensor) -> int: +def get_image_num_channels(img: Tensor) -> int: if img.ndim == 2: return 1 elif img.ndim > 2: @@ -50,7 +50,7 @@ def _max_value(dtype: torch.dtype) -> float: def _assert_channels(img: Tensor, permitted: List[int]) -> None: - c = _get_image_num_channels(img) + c = get_image_num_channels(img) if c not in permitted: raise TypeError("Input image tensor permitted channel values are {}, but found {}".format(permitted, c)) @@ -122,7 +122,7 @@ def hflip(img: Tensor) -> Tensor: def crop(img: Tensor, top: int, left: int, height: int, width: int) -> Tensor: _assert_image_tensor(img) - w, h = _get_image_size(img) + w, h = get_image_size(img) right = left + width bottom = top + height @@ -187,7 +187,7 @@ def adjust_hue(img: Tensor, hue_factor: float) -> Tensor: _assert_image_tensor(img) _assert_channels(img, [1, 3]) - if _get_image_num_channels(img) == 1: # Match PIL behaviour + if get_image_num_channels(img) == 1: # Match PIL behaviour return img orig_dtype = img.dtype @@ -513,7 +513,7 @@ def resize( if antialias and interpolation not in ["bilinear", "bicubic"]: raise ValueError("Antialias option is supported for bilinear and bicubic interpolation modes only") - w, h = _get_image_size(img) + w, h = get_image_size(img) if isinstance(size, int) or len(size) == 1: # specified size only for the smallest edge short, long = (w, h) if w <= h else (h, w) @@ -586,7 +586,7 @@ def _assert_grid_transform_inputs( warnings.warn("Argument fill should be either int, float, tuple or list") # Check fill - num_channels = _get_image_num_channels(img) + num_channels = get_image_num_channels(img) if isinstance(fill, (tuple, list)) and (len(fill) > 1 and len(fill) != num_channels): msg = ("The number of elements in 'fill' cannot broadcast to match the number of " "channels of the image ({} != {})") diff --git a/torchvision/transforms/transforms.py b/torchvision/transforms/transforms.py index 954d5f5f064..a5582b20a45 100644 --- a/torchvision/transforms/transforms.py +++ b/torchvision/transforms/transforms.py @@ -575,7 +575,7 @@ def get_params(img: Tensor, output_size: Tuple[int, int]) -> Tuple[int, int, int Returns: tuple: params (i, j, h, w) to be passed to ``crop`` for random crop. """ - w, h = F._get_image_size(img) + w, h = F.get_image_size(img) th, tw = output_size if h + 1 < th or w + 1 < tw: @@ -613,7 +613,7 @@ def forward(self, img): if self.padding is not None: img = F.pad(img, self.padding, self.fill, self.padding_mode) - width, height = F._get_image_size(img) + width, height = F.get_image_size(img) # pad the width if needed if self.pad_if_needed and width < self.size[1]: padding = [self.size[1] - width, 0] @@ -742,12 +742,12 @@ def forward(self, img): fill = self.fill if isinstance(img, Tensor): if isinstance(fill, (int, float)): - fill = [float(fill)] * F._get_image_num_channels(img) + fill = [float(fill)] * F.get_image_num_channels(img) else: fill = [float(f) for f in fill] if torch.rand(1) < self.p: - width, height = F._get_image_size(img) + width, height = F.get_image_size(img) startpoints, endpoints = self.get_params(width, height, self.distortion_scale) return F.perspective(img, startpoints, endpoints, self.interpolation, fill) return img @@ -858,7 +858,7 @@ def get_params( tuple: params (i, j, h, w) to be passed to ``crop`` for a random sized crop. """ - width, height = F._get_image_size(img) + width, height = F.get_image_size(img) area = height * width log_ratio = torch.log(torch.tensor(ratio)) @@ -1280,7 +1280,7 @@ def forward(self, img): fill = self.fill if isinstance(img, Tensor): if isinstance(fill, (int, float)): - fill = [float(fill)] * F._get_image_num_channels(img) + fill = [float(fill)] * F.get_image_num_channels(img) else: fill = [float(f) for f in fill] angle = self.get_params(self.degrees) @@ -1439,11 +1439,11 @@ def forward(self, img): fill = self.fill if isinstance(img, Tensor): if isinstance(fill, (int, float)): - fill = [float(fill)] * F._get_image_num_channels(img) + fill = [float(fill)] * F.get_image_num_channels(img) else: fill = [float(f) for f in fill] - img_size = F._get_image_size(img) + img_size = F.get_image_size(img) ret = self.get_params(self.degrees, self.translate, self.scale, self.shear, img_size) @@ -1529,7 +1529,7 @@ def forward(self, img): Returns: PIL Image or Tensor: Randomly grayscaled image. """ - num_output_channels = F._get_image_num_channels(img) + num_output_channels = F.get_image_num_channels(img) if torch.rand(1) < self.p: return F.rgb_to_grayscale(img, num_output_channels=num_output_channels) return img From a3d9c416c70ca9fe645678675a46dc0bfe6c5556 Mon Sep 17 00:00:00 2001 From: Vasilis Vryniotis Date: Fri, 27 Aug 2021 09:50:48 +0100 Subject: [PATCH 11/12] Do not disable profiling executor in ONNX tests (#4324) [ghstack-poisoned] Co-authored-by: Bert Maher --- test/test_onnx.py | 8 -------- 1 file changed, 8 deletions(-) diff --git a/test/test_onnx.py b/test/test_onnx.py index ce0bc5c7b97..cd3239cef16 100644 --- a/test/test_onnx.py +++ b/test/test_onnx.py @@ -397,10 +397,6 @@ def test_faster_rcnn(self): # This test also compares both paste_masks_in_image and _onnx_paste_masks_in_image # (since jit_trace witll call _onnx_paste_masks_in_image). def test_paste_mask_in_image(self): - # disable profiling - torch._C._jit_set_profiling_executor(False) - torch._C._jit_set_profiling_mode(False) - masks = torch.rand(10, 1, 26, 26) boxes = torch.rand(10, 4) boxes[:, 2:] += torch.rand(10, 2) @@ -452,10 +448,6 @@ def test_mask_rcnn(self): # This test also compares both heatmaps_to_keypoints and _onnx_heatmaps_to_keypoints # (since jit_trace witll call _heatmaps_to_keypoints). def test_heatmaps_to_keypoints(self): - # disable profiling - torch._C._jit_set_profiling_executor(False) - torch._C._jit_set_profiling_mode(False) - maps = torch.rand(10, 1, 26, 26) rois = torch.rand(10, 4) from torchvision.models.detection.roi_heads import heatmaps_to_keypoints From 2365bdc617f0d4dcd85ecd3311764bd8e58541ea Mon Sep 17 00:00:00 2001 From: Nicolas Hug Date: Fri, 27 Aug 2021 14:44:37 +0200 Subject: [PATCH 12/12] Simplify bug report --- .github/ISSUE_TEMPLATE/bug-report.yml | 35 ++++++++------------------- 1 file changed, 10 insertions(+), 25 deletions(-) diff --git a/.github/ISSUE_TEMPLATE/bug-report.yml b/.github/ISSUE_TEMPLATE/bug-report.yml index ce9ae1a313a..3da5447d6a7 100644 --- a/.github/ISSUE_TEMPLATE/bug-report.yml +++ b/.github/ISSUE_TEMPLATE/bug-report.yml @@ -9,14 +9,9 @@ body: - type: textarea attributes: label: 🐛 Describe the bug - description: > - A clear and concise description of what the bug is. - validations: - required: true -- type: textarea - attributes: - label: Steps/Code to Reproduce description: | + A clear and concise description of what the bug is. + Please add a minimal example so that we can reproduce the error by running the code. It is very important for he snippet to be as succinct (minimal) as possible, so please take time to trim down any irrelevant code to help us debug efficiently. We are going to copy-paste your code and we expect to get the same result as you did: avoid any external data, and include the relevant imports, etc. For example: ```python @@ -33,28 +28,18 @@ body: ``` If the code is too long (hopefully, it isn't), feel free to put it in a public gist and link it in the issue: https://gist.github.com. + + Please also paste or describe the results you observe instead of the expected results. If you observe an error, please paste the error message including the **full** traceback of the exception. It may be relevant to wrap error messages in ```` ```triple quotes blocks``` ````. placeholder: | + + ```python Sample code to reproduce the problem ``` - validations: - required: true -- type: textarea - attributes: - label: Expected Results - description: > - Please paste or describe the expected results. - placeholder: > - Example: No error is thrown. - validations: - required: true -- type: textarea - attributes: - label: Actual Results - description: > - Please paste or describe the results you observe instead of the expected results. If you observe an error, please paste the error message including the **full** traceback of the exception. It may be relevant to wrap error messages in ```` ```triple quotes blocks``` ````. - placeholder: > - Please paste or specifically describe the actual output or traceback. + + ``` + The error message you got, with the full traceback. + ```` validations: required: true - type: textarea