diff --git a/.github/workflows/blossom-ci.yml b/.github/workflows/blossom-ci.yml
index 1d6ee8a46c..bf507bab3b 100644
--- a/.github/workflows/blossom-ci.yml
+++ b/.github/workflows/blossom-ci.yml
@@ -29,13 +29,15 @@ jobs:
       args: ${{ env.args }}
 
     # This job only runs for pull request comments
-    if: contains('\
-      Nic-Ma,\
-      wyli,\
-      pxLi,\
-      YanxuanLiu,\
-      KumoLiu,\
-      ', format('{0},', github.actor)) && github.event.comment.body == '/build'
+    if: |
+      github.event.comment.body == '/build' &&
+      (
+        github.actor == 'Nic-Ma' ||
+        github.actor == 'wyli' ||
+        github.actor == 'pxLi' ||
+        github.actor == 'YanxuanLiu' ||
+        github.actor == 'KumoLiu'
+      )
     steps:
       - name: Check if comment is issued by authorized person
         run: blossom-ci
diff --git a/.github/workflows/pythonapp.yml b/.github/workflows/pythonapp.yml
index b8b73907d4..d1e77bb567 100644
--- a/.github/workflows/pythonapp.yml
+++ b/.github/workflows/pythonapp.yml
@@ -100,6 +100,7 @@ jobs:
         python -m pip install --pre -U itk
     - name: Install the dependencies
       run: |
+        python -m pip install --user --upgrade pip wheel
         python -m pip install torch==1.13.1 torchvision==0.14.1
         cat "requirements-dev.txt"
         python -m pip install -r requirements-dev.txt
diff --git a/.github/workflows/release.yml b/.github/workflows/release.yml
index c134724665..60b610565e 100644
--- a/.github/workflows/release.yml
+++ b/.github/workflows/release.yml
@@ -119,7 +119,8 @@ jobs:
           rm -rf {*,.[^.]*}
 
   release_tag_docker:
-    if: github.repository == 'Project-MONAI/MONAI'
+    # if: github.repository == 'Project-MONAI/MONAI'
+    if: ${{ false }}
     needs: versioning
     runs-on: ubuntu-latest
     steps:
diff --git a/CHANGELOG.md b/CHANGELOG.md
index 61be8f07c1..38336505ed 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -5,6 +5,98 @@ The format is based on [Keep a Changelog](http://keepachangelog.com/en/1.0.0/).
 
 ## [Unreleased]
 
+## [1.3.1] - 2024-05-17
+### Added
+* Support for `by_measure` argument in `RemoveSmallObjects` (#7137)
+* Support for `pretrained` flag in `ResNet` (#7095)
+* Support for uploading and downloading bundles to and from the Hugging Face Hub (#6454)
+* Added weight parameter in DiceLoss to apply weight to voxels of each class (#7158)
+* Support for returning dice for each class in `DiceMetric` (#7163)
+* Introduced `ComponentStore` for storage purposes (#7159)
+* Added utilities used in MONAI Generative (#7134)
+* Enabled Python 3.11 support for `convert_to_torchscript` and `convert_to_onnx` (#7182)
+* Support for MLflow in `AutoRunner` (#7176)
+* `fname_regex` option in PydicomReader (#7181)
+* Allowed setting AutoRunner parameters from config (#7175)
+* `VoxelMorphUNet` and `VoxelMorph` (#7178)
+* Enabled `cache` option in `GridPatchDataset` (#7180)
+* Introduced `class_labels` option in `write_metrics_reports` for improved readability (#7249)
+* `DiffusionLoss` for image registration task (#7272)
+* Supported specifying `filename` in `Saveimage` (#7318)
+* Compile support in `SupervisedTrainer` and `SupervisedEvaluator` (#7375)
+* `mlflow_experiment_name` support in `Auto3DSeg` (#7442)
+* Arm support (#7500)
+* `BarlowTwinsLoss` for representation learning (#7530)
+* `SURELoss` and `ConjugateGradient` for diffusion models (#7308)
+* Support for `CutMix`, `CutOut`, and `MixUp` augmentation techniques (#7198)
+* `meta_file` and `logging_file` options to `BundleWorkflow` (#7549)
+* `properties_path` option to `BundleWorkflow` for customized properties (#7542)
+* Support for both soft and hard clipping in `ClipIntensityPercentiles` (#7535)
+* Support for not saving artifacts in `MLFlowHandler` (#7604)
+* Support for multi-channel images in `PerceptualLoss` (#7568)
+* Added ResNet backbone for `FlexibleUNet` (#7571)
+* Introduced `dim_head` option in `SABlock` to set dimensions for each head (#7664)
+* Direct links to github source code to docs (#7738, #7779)
+#### misc.
+* Refactored `list_data_collate` and `collate_meta_tensor` to utilize the latest PyTorch API (#7165)
+* Added __str__ method in `Metric` base class (#7487)
+* Made enhancements for testing files (#7662, #7670, #7663, #7671, #7672)
+* Improved documentation for bundles (#7116)
+### Fixed
+#### transforms
+* Addressed issue where lazy mode was ignored in `SpatialPadd` (#7316)
+* Tracked applied operations in `ImageFilter` (#7395)
+* Warnings are now given only if missing class is not set to 0 in `generate_label_classes_crop_centers` (#7602)
+* Input is now always converted to C-order in `distance_transform_edt` to ensure consistent behavior (#7675)
+#### data
+* Modified .npz file behavior to use keys in `NumpyReader` (#7148)
+* Handled corrupted cached files in `PersistentDataset` (#7244)
+* Corrected affine update in `NrrdReader` (#7415)
+#### metrics and losses
+* Addressed precision issue in `get_confusion_matrix` (#7187)
+* Harmonized and clarified documentation and tests for dice losses variants (#7587)
+#### networks
+* Removed hard-coded `spatial_dims` in `SwinTransformer` (#7302)
+* Fixed learnable `position_embeddings` in `PatchEmbeddingBlock` (#7564, #7605)
+* Removed `memory_pool_limit` in TRT config (#7647)
+* Propagated `kernel_size` to `ConvBlocks` within `AttentionUnet` (#7734)
+* Addressed hard-coded activation layer in `ResNet` (#7749)
+#### bundle
+* Resolved bundle download issue (#7280)
+* Updated `bundle_root` directory for `NNIGen` (#7586)
+* Checked for `num_fold` and failed early if incorrect (#7634)
+* Enhanced logging logic in `ConfigWorkflow` (#7745)
+#### misc.
+* Enabled chaining in `Auto3DSeg` CLI (#7168)
+* Addressed useless error message in `nnUNetV2Runner` (#7217)
+* Resolved typing and deprecation issues in Mypy (#7231)
+* Quoted `$PY_EXE` variable to handle Python path that contains spaces in Bash (#7268)
+* Improved documentation, code examples, and warning messages in various modules (#7234, #7213, #7271, #7326, #7569, #7584)
+* Fixed typos in various modules (#7321, #7322, #7458, #7595, #7612)
+* Enhanced docstrings in various modules (#7245, #7381, #7746)
+* Handled error when data is on CPU in `DataAnalyzer` (#7310)
+* Updated version requirements for third-party packages (#7343, #7344, #7384, #7448, #7659, #7704, #7744, #7742, #7780)
+* Addressed incorrect slice compute in `ImageStats` (#7374)
+* Avoided editing a loop's mutable iterable to address B308 (#7397)
+* Fixed issue with `CUDA_VISIBLE_DEVICES` setting being ignored (#7408, #7581)
+* Avoided changing Python version in CICD (#7424)
+* Renamed partial to callable in instantiate mode (#7413)
+* Imported AttributeError for Python 3.12 compatibility (#7482)
+* Updated `nnUNetV2Runner` to support nnunetv2 2.2 (#7483)
+* Used uint8 instead of int8 in `LabelStats` (#7489)
+* Utilized subprocess for nnUNet training (#7576)
+* Addressed deprecated warning in ruff (#7625)
+* Fixed downloading failure on FIPS machine (#7698)
+* Updated `torch_tensorrt` compile parameters to avoid warning (#7714)
+* Restrict `Auto3DSeg` fold input based on datalist (#7778)
+### Changed
+* Base Docker image upgraded to `nvcr.io/nvidia/pytorch:24.03-py3` from `nvcr.io/nvidia/pytorch:23.08-py3`
+### Removed
+* Removed unrecommended star-arg unpacking after a keyword argument, addressed B026 (#7262)
+* Skipped old PyTorch version test for `SwinUNETR` (#7266)
+* Dropped docker build workflow and migrated to Nvidia Blossom system (#7450)
+* Dropped Python 3.8 test on quick-py3 workflow (#7719)
+
 ## [1.3.0] - 2023-10-12
 ### Added
 * Intensity transforms `ScaleIntensityFixedMean` and `RandScaleIntensityFixedMean` (#6542)
@@ -943,7 +1035,8 @@ the postprocessing steps should be used before calling the metrics methods
 
 [highlights]: https://github.com/Project-MONAI/MONAI/blob/master/docs/source/highlights.md
 
-[Unreleased]: https://github.com/Project-MONAI/MONAI/compare/1.3.0...HEAD
+[Unreleased]: https://github.com/Project-MONAI/MONAI/compare/1.3.1...HEAD
+[1.3.1]: https://github.com/Project-MONAI/MONAI/compare/1.3.0...1.3.1
 [1.3.0]: https://github.com/Project-MONAI/MONAI/compare/1.2.0...1.3.0
 [1.2.0]: https://github.com/Project-MONAI/MONAI/compare/1.1.0...1.2.0
 [1.1.0]: https://github.com/Project-MONAI/MONAI/compare/1.0.1...1.1.0
diff --git a/CITATION.cff b/CITATION.cff
index cac47faae4..4754c5b2e3 100644
--- a/CITATION.cff
+++ b/CITATION.cff
@@ -6,8 +6,8 @@ title: "MONAI: Medical Open Network for AI"
 abstract: "AI Toolkit for Healthcare Imaging"
 authors:
   - name: "MONAI Consortium"
-date-released: 2023-10-12
-version: "1.3.0"
+date-released: 2024-05-21
+version: "1.3.1"
 identifiers:
   - description: "This DOI represents all versions of MONAI, and will always resolve to the latest one."
     type: doi
diff --git a/README.md b/README.md
index 7565fea1b7..5345cdb926 100644
--- a/README.md
+++ b/README.md
@@ -12,7 +12,6 @@
 
 [![premerge](https://github.com/Project-MONAI/MONAI/actions/workflows/pythonapp.yml/badge.svg?branch=dev)](https://github.com/Project-MONAI/MONAI/actions/workflows/pythonapp.yml)
 [![postmerge](https://img.shields.io/github/checks-status/project-monai/monai/dev?label=postmerge)](https://github.com/Project-MONAI/MONAI/actions?query=branch%3Adev)
-[![docker](https://github.com/Project-MONAI/MONAI/actions/workflows/docker.yml/badge.svg?branch=dev)](https://github.com/Project-MONAI/MONAI/actions/workflows/docker.yml)
 [![Documentation Status](https://readthedocs.org/projects/monai/badge/?version=latest)](https://docs.monai.io/en/latest/)
 [![codecov](https://codecov.io/gh/Project-MONAI/MONAI/branch/dev/graph/badge.svg?token=6FTC7U1JJ4)](https://codecov.io/gh/Project-MONAI/MONAI)
 
diff --git a/docs/requirements.txt b/docs/requirements.txt
index 5acc437391..007281ac35 100644
--- a/docs/requirements.txt
+++ b/docs/requirements.txt
@@ -21,8 +21,8 @@ sphinxcontrib-serializinghtml
 sphinx-autodoc-typehints==1.11.1
 pandas
 einops
-transformers<4.22; python_version <= '3.10'  # https://github.com/Project-MONAI/MONAI/issues/5157
-mlflow>=1.28.0, <=2.11.3
+transformers>=4.36.0, <4.41.0; python_version <= '3.10'
+mlflow>=2.12.2
 clearml>=1.10.0rc0
 tensorboardX
 imagecodecs; platform_system == "Linux" or platform_system == "Darwin"
diff --git a/docs/source/conf.py b/docs/source/conf.py
index 827626d12e..a91f38081f 100644
--- a/docs/source/conf.py
+++ b/docs/source/conf.py
@@ -97,7 +97,7 @@ def generate_apidocs(*args):
     "sphinx.ext.mathjax",
     "sphinx.ext.napoleon",
     "sphinx.ext.autodoc",
-    "sphinx.ext.viewcode",
+    "sphinx.ext.linkcode",
     "sphinx.ext.autosectionlabel",
     "sphinx.ext.autosummary",
     "sphinx_autodoc_typehints",
@@ -140,7 +140,7 @@ def generate_apidocs(*args):
     "github_repo": "MONAI",
     "github_version": "dev",
     "doc_path": "docs/source",
-    "conf_py_path": "/docs/",
+    "conf_py_path": "/docs/source",
     "VERSION": version,
 }
 html_scaled_image_link = False
@@ -167,11 +167,24 @@ def setup(app):
 
 
 # -- Linkcode configuration --------------------------------------------------
+DEFAULT_REF = "dev"
+read_the_docs_ref = os.environ.get("READTHEDOCS_GIT_IDENTIFIER", None)
+if read_the_docs_ref:
+    # When building on ReadTheDocs, link to the specific commit
+    # https://docs.readthedocs.io/en/stable/reference/environment-variables.html#envvar-READTHEDOCS_GIT_IDENTIFIER
+    git_ref = read_the_docs_ref
+elif os.environ.get("GITHUB_REF_TYPE", "branch") == "tag":
+    # When building a tag, link to the tag itself
+    git_ref = os.environ.get("GITHUB_REF", DEFAULT_REF)
+else:
+    git_ref = os.environ.get("GITHUB_SHA", DEFAULT_REF)
+
 DEFAULT_REPOSITORY = "Project-MONAI/MONAI"
 repository = os.environ.get("GITHUB_REPOSITORY", DEFAULT_REPOSITORY)
 
-base_code_url = f"https://github.com/{repository}/blob/{version}"
+base_code_url = f"https://github.com/{repository}/blob/{git_ref}"
 MODULE_ROOT_FOLDER = "monai"
+repo_root_path = os.path.abspath(os.path.join(os.path.dirname(__file__), "..", ".."))
 
 
 # Adjusted from https://github.com/python-websockets/websockets/blob/main/docs/conf.py
@@ -201,7 +214,7 @@ def linkcode_resolve(domain, info):
     except TypeError:
         # e.g. object is a typing.Union
         return None
-    file = os.path.relpath(file, os.path.abspath(os.path.join(os.path.dirname(__file__), "..", "..")))
+    file = os.path.relpath(file, repo_root_path)
     if not file.startswith(MODULE_ROOT_FOLDER):
         # e.g. object is a typing.NewType
         return None
diff --git a/docs/source/networks.rst b/docs/source/networks.rst
index 8321fed1a4..c51f5c88b1 100644
--- a/docs/source/networks.rst
+++ b/docs/source/networks.rst
@@ -426,7 +426,6 @@ Layers
 .. autoclass:: monai.networks.layers.vector_quantizer.VectorQuantizer
    :members:
 
-=======
 `ConjugateGradient`
 ~~~~~~~~~~~~~~~~~~~
 .. autoclass:: ConjugateGradient
diff --git a/monai/apps/auto3dseg/auto_runner.py b/monai/apps/auto3dseg/auto_runner.py
index 05c961f999..5b6b501555 100644
--- a/monai/apps/auto3dseg/auto_runner.py
+++ b/monai/apps/auto3dseg/auto_runner.py
@@ -499,8 +499,8 @@ def set_num_fold(self, num_fold: int = 5) -> AutoRunner:
 
         if num_fold <= 0:
             raise ValueError(f"num_fold is expected to be an integer greater than zero. Now it gets {num_fold}")
-        if num_fold > self.max_fold + 1:
-            # Auto3DSeg allows no validation set, so the maximum fold number is max_fold + 1
+        if num_fold > self.max_fold:
+            # Auto3DSeg must contain validation set, so the maximum fold number is max_fold.
             raise ValueError(
                 f"num_fold is greater than the maximum fold number {self.max_fold} in {self.datalist_filename}."
             )
diff --git a/monai/apps/detection/utils/anchor_utils.py b/monai/apps/detection/utils/anchor_utils.py
index 283169b653..cbde3ebae9 100644
--- a/monai/apps/detection/utils/anchor_utils.py
+++ b/monai/apps/detection/utils/anchor_utils.py
@@ -189,7 +189,7 @@ def generate_anchors(
             w_ratios = 1 / area_scale
             h_ratios = area_scale
         # if 3d, w:h:d = 1:aspect_ratios[:,0]:aspect_ratios[:,1]
-        elif self.spatial_dims == 3:
+        else:
             area_scale = torch.pow(aspect_ratios_t[:, 0] * aspect_ratios_t[:, 1], 1 / 3.0)
             w_ratios = 1 / area_scale
             h_ratios = aspect_ratios_t[:, 0] / area_scale
@@ -199,7 +199,7 @@ def generate_anchors(
         hs = (h_ratios[:, None] * scales_t[None, :]).view(-1)
         if self.spatial_dims == 2:
             base_anchors = torch.stack([-ws, -hs, ws, hs], dim=1) / 2.0
-        elif self.spatial_dims == 3:
+        else:  # elif self.spatial_dims == 3:
             ds = (d_ratios[:, None] * scales_t[None, :]).view(-1)
             base_anchors = torch.stack([-ws, -hs, -ds, ws, hs, ds], dim=1) / 2.0
 
diff --git a/monai/apps/pathology/transforms/post/array.py b/monai/apps/pathology/transforms/post/array.py
index 99e94f89c0..0f57fb41cb 100644
--- a/monai/apps/pathology/transforms/post/array.py
+++ b/monai/apps/pathology/transforms/post/array.py
@@ -379,6 +379,7 @@ def _generate_contour_coord(self, current: np.ndarray, previous: np.ndarray) ->
         """
 
         p_delta = (current[0] - previous[0], current[1] - previous[1])
+        row, col = -1, -1
 
         if p_delta in ((0.0, 1.0), (0.5, 0.5), (1.0, 0.0)):
             row = int(current[0] + 0.5)
diff --git a/monai/bundle/utils.py b/monai/bundle/utils.py
index a0f39d236f..0f17422ba5 100644
--- a/monai/bundle/utils.py
+++ b/monai/bundle/utils.py
@@ -221,6 +221,7 @@ def load_bundle_config(bundle_path: str, *config_names: str, **load_kw_args: Any
                 raise ValueError(f"Cannot find config file '{full_cname}'")
 
             ardata = archive.read(full_cname)
+            cdata = {}
 
             if full_cname.lower().endswith("json"):
                 cdata = json.loads(ardata, **load_kw_args)
diff --git a/monai/bundle/workflows.py b/monai/bundle/workflows.py
index b42852cb0f..11c9bf0562 100644
--- a/monai/bundle/workflows.py
+++ b/monai/bundle/workflows.py
@@ -308,7 +308,6 @@ def __init__(
         super().__init__(workflow_type=workflow_type, meta_file=meta_file, properties_path=properties_path)
         self.config_root_path = config_root_path
         logging_file = str(self.config_root_path / "logging.conf") if logging_file is None else logging_file
-
         if logging_file is False:
             logger.warn(f"Logging file is set to {logging_file}, skipping logging.")
         else:
diff --git a/monai/data/dataset.py b/monai/data/dataset.py
index 79e066303e..871b523289 100644
--- a/monai/data/dataset.py
+++ b/monai/data/dataset.py
@@ -36,15 +36,7 @@
 
 from monai.data.meta_tensor import MetaTensor
 from monai.data.utils import SUPPORTED_PICKLE_MOD, convert_tables_to_dicts, pickle_hashing
-from monai.transforms import (
-    Compose,
-    Randomizable,
-    RandomizableTrait,
-    Transform,
-    apply_transform,
-    convert_to_contiguous,
-    reset_ops_id,
-)
+from monai.transforms import Compose, Randomizable, RandomizableTrait, Transform, convert_to_contiguous, reset_ops_id
 from monai.utils import MAX_SEED, convert_to_tensor, get_seed, look_up_option, min_version, optional_import
 from monai.utils.misc import first
 
@@ -77,15 +69,19 @@ class Dataset(_TorchDataset):
          },                           },                           }]
     """
 
-    def __init__(self, data: Sequence, transform: Callable | None = None) -> None:
+    def __init__(self, data: Sequence, transform: Sequence[Callable] | Callable | None = None) -> None:
         """
         Args:
             data: input data to load and transform to generate dataset for model.
-            transform: a callable data transform on input data.
-
+            transform: a callable, sequence of callables or None. If transform is not
+            a `Compose` instance, it will be wrapped in a `Compose` instance. Sequences
+            of callables are applied in order and if `None` is passed, the data is returned as is.
         """
         self.data = data
-        self.transform: Any = transform
+        try:
+            self.transform = Compose(transform) if not isinstance(transform, Compose) else transform
+        except Exception as e:
+            raise ValueError("`transform` must be a callable or a list of callables that is Composable") from e
 
     def __len__(self) -> int:
         return len(self.data)
@@ -95,7 +91,7 @@ def _transform(self, index: int):
         Fetch single data item from `self.data`.
         """
         data_i = self.data[index]
-        return apply_transform(self.transform, data_i) if self.transform is not None else data_i
+        return self.transform(data_i)
 
     def __getitem__(self, index: int | slice | Sequence[int]):
         """
@@ -264,8 +260,6 @@ def __init__(
                 using the cached content and with re-created transform instances.
 
         """
-        if not isinstance(transform, Compose):
-            transform = Compose(transform)
         super().__init__(data=data, transform=transform)
         self.cache_dir = Path(cache_dir) if cache_dir is not None else None
         self.hash_func = hash_func
@@ -323,9 +317,6 @@ def _pre_transform(self, item_transformed):
             random transform object
 
         """
-        if not isinstance(self.transform, Compose):
-            raise ValueError("transform must be an instance of monai.transforms.Compose.")
-
         first_random = self.transform.get_index_of_first(
             lambda t: isinstance(t, RandomizableTrait) or not isinstance(t, Transform)
         )
@@ -346,9 +337,6 @@ def _post_transform(self, item_transformed):
             the transformed element through the random transforms
 
         """
-        if not isinstance(self.transform, Compose):
-            raise ValueError("transform must be an instance of monai.transforms.Compose.")
-
         first_random = self.transform.get_index_of_first(
             lambda t: isinstance(t, RandomizableTrait) or not isinstance(t, Transform)
         )
@@ -501,9 +489,6 @@ def _pre_transform(self, item_transformed):
         Returns:
             the transformed element up to the N transform object
         """
-        if not isinstance(self.transform, Compose):
-            raise ValueError("transform must be an instance of monai.transforms.Compose.")
-
         item_transformed = self.transform(item_transformed, end=self.cache_n_trans, threading=True)
 
         reset_ops_id(item_transformed)
@@ -519,9 +504,6 @@ def _post_transform(self, item_transformed):
         Returns:
             the final transformed result
         """
-        if not isinstance(self.transform, Compose):
-            raise ValueError("transform must be an instance of monai.transforms.Compose.")
-
         return self.transform(item_transformed, start=self.cache_n_trans)
 
 
@@ -809,8 +791,6 @@ def __init__(
                 Not following these recommendations may lead to runtime errors or duplicated cache across processes.
 
         """
-        if not isinstance(transform, Compose):
-            transform = Compose(transform)
         super().__init__(data=data, transform=transform)
         self.set_num = cache_num  # tracking the user-provided `cache_num` option
         self.set_rate = cache_rate  # tracking the user-provided `cache_rate` option
@@ -1282,8 +1262,10 @@ def to_list(x):
         data = []
         for dataset in self.data:
             data.extend(to_list(dataset[index]))
+
         if self.transform is not None:
-            data = apply_transform(self.transform, data, map_items=False)  # transform the list data
+            self.transform.map_items = False  # Compose object map_items to false so transform is applied to list
+            data = self.transform(data)
         # use tuple instead of list as the default collate_fn callback of MONAI DataLoader flattens nested lists
         return tuple(data)
 
@@ -1432,15 +1414,11 @@ def __len__(self):
 
     def _transform(self, index: int):
         data = {k: v[index] for k, v in self.arrays.items()}
-
-        if not self.transform:
-            return data
-
-        result = apply_transform(self.transform, data)
+        result = self.transform(data) if self.transform is not None else data
 
         if isinstance(result, dict) or (isinstance(result, list) and isinstance(result[0], dict)):
             return result
-        raise AssertionError("With a dict supplied to apply_transform, should return a dict or a list of dicts.")
+        raise AssertionError("With a dict supplied to Compose, should return a dict or a list of dicts.")
 
 
 class CSVDataset(Dataset):
diff --git a/monai/data/dataset_summary.py b/monai/data/dataset_summary.py
index 769ae33b46..5b9e32afca 100644
--- a/monai/data/dataset_summary.py
+++ b/monai/data/dataset_summary.py
@@ -84,6 +84,7 @@ def collect_meta_data(self):
         """
 
         for data in self.data_loader:
+            meta_dict = {}
             if isinstance(data[self.image_key], MetaTensor):
                 meta_dict = data[self.image_key].meta
             elif self.meta_key in data:
diff --git a/monai/data/image_reader.py b/monai/data/image_reader.py
index 2361bb63a7..f5e199e2a3 100644
--- a/monai/data/image_reader.py
+++ b/monai/data/image_reader.py
@@ -1331,7 +1331,7 @@ def get_data(self, img: NrrdImage | list[NrrdImage]) -> tuple[np.ndarray, dict]:
                 header[MetaKeys.SPACE] = SpaceKeys.LPS  # assuming LPS if not specified
 
             header[MetaKeys.AFFINE] = header[MetaKeys.ORIGINAL_AFFINE].copy()
-            header[MetaKeys.SPATIAL_SHAPE] = header["sizes"]
+            header[MetaKeys.SPATIAL_SHAPE] = header["sizes"].copy()
             [header.pop(k) for k in ("sizes", "space origin", "space directions")]  # rm duplicated data in header
 
             if self.channel_dim is None:  # default to "no_channel" or -1
diff --git a/monai/data/torchscript_utils.py b/monai/data/torchscript_utils.py
index cabf06ce89..507cf411d6 100644
--- a/monai/data/torchscript_utils.py
+++ b/monai/data/torchscript_utils.py
@@ -116,7 +116,7 @@ def load_net_with_metadata(
     Returns:
         Triple containing loaded object, metadata dict, and extra files dict containing other file data if present
     """
-    extra_files = {f: "" for f in more_extra_files}
+    extra_files = dict.fromkeys(more_extra_files, "")
     extra_files[METADATA_FILENAME] = ""
 
     jit_obj = torch.jit.load(filename_prefix_or_stream, map_location, extra_files)
diff --git a/monai/data/utils.py b/monai/data/utils.py
index 585f02ec9e..7a08300abb 100644
--- a/monai/data/utils.py
+++ b/monai/data/utils.py
@@ -53,10 +53,6 @@
     pytorch_after,
 )
 
-if pytorch_after(1, 13):
-    # import private code for reuse purposes, comment in case things break in the future
-    from torch.utils.data._utils.collate import collate_tensor_fn, default_collate_fn_map
-
 pd, _ = optional_import("pandas")
 DataFrame, _ = optional_import("pandas", name="DataFrame")
 nib, _ = optional_import("nibabel")
@@ -454,8 +450,13 @@ def collate_meta_tensor_fn(batch, *, collate_fn_map=None):
     Collate a sequence of meta tensor into a single batched metatensor. This is called by `collage_meta_tensor`
     and so should not be used as a collate function directly in dataloaders.
     """
-    collate_fn = collate_tensor_fn if pytorch_after(1, 13) else default_collate
-    collated = collate_fn(batch)  # type: ignore
+    if pytorch_after(1, 13):
+        from torch.utils.data._utils.collate import collate_tensor_fn  # imported here for pylint/mypy issues
+
+        collated = collate_tensor_fn(batch)
+    else:
+        collated = default_collate(batch)
+
     meta_dicts = [i.meta or TraceKeys.NONE for i in batch]
     common_ = set.intersection(*[set(d.keys()) for d in meta_dicts if isinstance(d, dict)])
     if common_:
@@ -496,6 +497,8 @@ def list_data_collate(batch: Sequence):
 
     if pytorch_after(1, 13):
         # needs to go here to avoid circular import
+        from torch.utils.data._utils.collate import default_collate_fn_map
+
         from monai.data.meta_tensor import MetaTensor
 
         default_collate_fn_map.update({MetaTensor: collate_meta_tensor_fn})
diff --git a/monai/networks/blocks/spatialattention.py b/monai/networks/blocks/spatialattention.py
index 020d8d23fd..75319853d9 100644
--- a/monai/networks/blocks/spatialattention.py
+++ b/monai/networks/blocks/spatialattention.py
@@ -68,7 +68,7 @@ def forward(self, x: torch.Tensor):
             h, w = x.shape[2], x.shape[3]
             rearrange_input = Rearrange("b c h w -> b (h w) c")
             rearrange_output = Rearrange("b (h w) c -> b c h w", h=h, w=w)
-        if self.spatial_dims == 3:
+        else:
             h, w, d = x.shape[2], x.shape[3], x.shape[4]
             rearrange_input = Rearrange("b c h w d -> b (h w d) c")
             rearrange_output = Rearrange("b (h w d) c -> b c h w d", h=h, w=w, d=d)
diff --git a/monai/networks/nets/daf3d.py b/monai/networks/nets/daf3d.py
index c9a18c746a..02e5bb022a 100644
--- a/monai/networks/nets/daf3d.py
+++ b/monai/networks/nets/daf3d.py
@@ -13,6 +13,7 @@
 
 from collections import OrderedDict
 from collections.abc import Callable, Sequence
+from functools import partial
 
 import torch
 import torch.nn as nn
@@ -25,6 +26,7 @@
 from monai.networks.blocks.convolutions import Convolution
 from monai.networks.blocks.feature_pyramid_network import ExtraFPNBlock, FeaturePyramidNetwork
 from monai.networks.layers.factories import Conv, Norm
+from monai.networks.layers.utils import get_norm_layer
 from monai.networks.nets.resnet import ResNet, ResNetBottleneck
 
 __all__ = [
@@ -170,33 +172,37 @@ class Daf3dResNetBottleneck(ResNetBottleneck):
         spatial_dims: number of spatial dimensions of the input image.
         stride: stride to use for second conv layer.
         downsample: which downsample layer to use.
+        norm: which normalization layer to use. Defaults to group.
     """
 
     expansion = 2
 
-    def __init__(self, in_planes, planes, spatial_dims=3, stride=1, downsample=None):
-        norm_type: Callable = Norm[Norm.GROUP, spatial_dims]
+    def __init__(
+        self, in_planes, planes, spatial_dims=3, stride=1, downsample=None, norm=("group", {"num_groups": 32})
+    ):
         conv_type: Callable = Conv[Conv.CONV, spatial_dims]
 
+        norm_layer = partial(get_norm_layer, name=norm, spatial_dims=spatial_dims)
+
         # in case downsample uses batch norm, change to group norm
         if isinstance(downsample, nn.Sequential):
             downsample = nn.Sequential(
                 conv_type(in_planes, planes * self.expansion, kernel_size=1, stride=stride, bias=False),
-                norm_type(num_groups=32, num_channels=planes * self.expansion),
+                norm_layer(channels=planes * self.expansion),
             )
 
         super().__init__(in_planes, planes, spatial_dims, stride, downsample)
 
         # change norm from batch to group norm
-        self.bn1 = norm_type(num_groups=32, num_channels=planes)
-        self.bn2 = norm_type(num_groups=32, num_channels=planes)
-        self.bn3 = norm_type(num_groups=32, num_channels=planes * self.expansion)
+        self.bn1 = norm_layer(channels=planes)
+        self.bn2 = norm_layer(channels=planes)
+        self.bn3 = norm_layer(channels=planes * self.expansion)
 
         # adapt second convolution to work with groups
         self.conv2 = conv_type(planes, planes, kernel_size=3, padding=1, stride=stride, groups=32, bias=False)
 
         # adapt activation function
-        self.relu = nn.PReLU()  # type: ignore
+        self.relu = nn.PReLU()
 
 
 class Daf3dResNetDilatedBottleneck(Daf3dResNetBottleneck):
@@ -212,8 +218,10 @@ class Daf3dResNetDilatedBottleneck(Daf3dResNetBottleneck):
         downsample: which downsample layer to use.
     """
 
-    def __init__(self, in_planes, planes, spatial_dims=3, stride=1, downsample=None):
-        super().__init__(in_planes, planes, spatial_dims, stride, downsample)
+    def __init__(
+        self, in_planes, planes, spatial_dims=3, stride=1, downsample=None, norm=("group", {"num_groups": 32})
+    ):
+        super().__init__(in_planes, planes, spatial_dims, stride, downsample, norm)
 
         # add dilation in second convolution
         conv_type: Callable = Conv[Conv.CONV, spatial_dims]
@@ -287,7 +295,7 @@ def __init__(
             n_input_channels, self.in_planes, kernel_size=7, stride=(1, 2, 2), padding=(3, 3, 3), bias=False
         )
         self.bn1 = norm_type(32, 64)
-        self.relu = nn.PReLU()  # type: ignore
+        self.relu = nn.PReLU()
 
         # adapt layers to our needs
         self.layer1 = self._make_layer(Daf3dResNetBottleneck, block_inplanes[0], layers[0], spatial_dims, shortcut_type)
diff --git a/monai/networks/nets/quicknat.py b/monai/networks/nets/quicknat.py
index cbcccf24d7..bbc4e7e490 100644
--- a/monai/networks/nets/quicknat.py
+++ b/monai/networks/nets/quicknat.py
@@ -168,6 +168,8 @@ def _get_layer(self, in_channels, out_channels, dilation):
     def forward(self, input, _):
         i = 0
         result = input
+        result1 = input  # this will not stay this value, needed here for pylint/mypy
+
         for l in self.children():
             # ignoring the max (un-)pool and droupout already added in the initial initialization step
             if isinstance(l, (nn.MaxPool2d, nn.MaxUnpool2d, nn.Dropout2d)):
diff --git a/monai/networks/nets/resnet.py b/monai/networks/nets/resnet.py
index 74d15bc6bf..6e61db07ca 100644
--- a/monai/networks/nets/resnet.py
+++ b/monai/networks/nets/resnet.py
@@ -22,8 +22,8 @@
 import torch.nn as nn
 
 from monai.networks.blocks.encoder import BaseEncoder
-from monai.networks.layers.factories import Conv, Norm, Pool
-from monai.networks.layers.utils import get_pool_layer
+from monai.networks.layers.factories import Conv, Pool
+from monai.networks.layers.utils import get_act_layer, get_norm_layer, get_pool_layer
 from monai.utils import ensure_tuple_rep
 from monai.utils.module import look_up_option, optional_import
 
@@ -57,7 +57,6 @@
     "resnet200": ("bottleneck", [3, 24, 36, 3], "B", False, False),
 }
 
-
 logger = logging.getLogger(__name__)
 
 
@@ -79,6 +78,8 @@ def __init__(
         spatial_dims: int = 3,
         stride: int = 1,
         downsample: nn.Module | partial | None = None,
+        act: str | tuple = ("relu", {"inplace": True}),
+        norm: str | tuple = "batch",
     ) -> None:
         """
         Args:
@@ -87,17 +88,18 @@ def __init__(
             spatial_dims: number of spatial dimensions of the input image.
             stride: stride to use for first conv layer.
             downsample: which downsample layer to use.
+            act: activation type and arguments. Defaults to relu.
+            norm: feature normalization type and arguments. Defaults to batch norm.
         """
         super().__init__()
 
         conv_type: Callable = Conv[Conv.CONV, spatial_dims]
-        norm_type: Callable = Norm[Norm.BATCH, spatial_dims]
 
         self.conv1 = conv_type(in_planes, planes, kernel_size=3, padding=1, stride=stride, bias=False)
-        self.bn1 = norm_type(planes)
-        self.relu = nn.ReLU(inplace=True)
+        self.bn1 = get_norm_layer(name=norm, spatial_dims=spatial_dims, channels=planes)
+        self.act = get_act_layer(name=act)
         self.conv2 = conv_type(planes, planes, kernel_size=3, padding=1, bias=False)
-        self.bn2 = norm_type(planes)
+        self.bn2 = get_norm_layer(name=norm, spatial_dims=spatial_dims, channels=planes)
         self.downsample = downsample
         self.stride = stride
 
@@ -106,7 +108,7 @@ def forward(self, x: torch.Tensor) -> torch.Tensor:
 
         out: torch.Tensor = self.conv1(x)
         out = self.bn1(out)
-        out = self.relu(out)
+        out = self.act(out)
 
         out = self.conv2(out)
         out = self.bn2(out)
@@ -115,7 +117,7 @@ def forward(self, x: torch.Tensor) -> torch.Tensor:
             residual = self.downsample(x)
 
         out += residual
-        out = self.relu(out)
+        out = self.act(out)
 
         return out
 
@@ -130,6 +132,8 @@ def __init__(
         spatial_dims: int = 3,
         stride: int = 1,
         downsample: nn.Module | partial | None = None,
+        act: str | tuple = ("relu", {"inplace": True}),
+        norm: str | tuple = "batch",
     ) -> None:
         """
         Args:
@@ -138,20 +142,22 @@ def __init__(
             spatial_dims: number of spatial dimensions of the input image.
             stride: stride to use for second conv layer.
             downsample: which downsample layer to use.
+            act: activation type and arguments. Defaults to relu.
+            norm: feature normalization type and arguments. Defaults to batch norm.
         """
 
         super().__init__()
 
         conv_type: Callable = Conv[Conv.CONV, spatial_dims]
-        norm_type: Callable = Norm[Norm.BATCH, spatial_dims]
+        norm_layer = partial(get_norm_layer, name=norm, spatial_dims=spatial_dims)
 
         self.conv1 = conv_type(in_planes, planes, kernel_size=1, bias=False)
-        self.bn1 = norm_type(planes)
+        self.bn1 = norm_layer(channels=planes)
         self.conv2 = conv_type(planes, planes, kernel_size=3, stride=stride, padding=1, bias=False)
-        self.bn2 = norm_type(planes)
+        self.bn2 = norm_layer(channels=planes)
         self.conv3 = conv_type(planes, planes * self.expansion, kernel_size=1, bias=False)
-        self.bn3 = norm_type(planes * self.expansion)
-        self.relu = nn.ReLU(inplace=True)
+        self.bn3 = norm_layer(channels=planes * self.expansion)
+        self.act = get_act_layer(name=act)
         self.downsample = downsample
         self.stride = stride
 
@@ -160,11 +166,11 @@ def forward(self, x: torch.Tensor) -> torch.Tensor:
 
         out: torch.Tensor = self.conv1(x)
         out = self.bn1(out)
-        out = self.relu(out)
+        out = self.act(out)
 
         out = self.conv2(out)
         out = self.bn2(out)
-        out = self.relu(out)
+        out = self.act(out)
 
         out = self.conv3(out)
         out = self.bn3(out)
@@ -173,7 +179,7 @@ def forward(self, x: torch.Tensor) -> torch.Tensor:
             residual = self.downsample(x)
 
         out += residual
-        out = self.relu(out)
+        out = self.act(out)
 
         return out
 
@@ -203,6 +209,8 @@ class ResNet(nn.Module):
         num_classes: number of output (classifications).
         feed_forward: whether to add the FC layer for the output, default to `True`.
         bias_downsample: whether to use bias term in the downsampling block when `shortcut_type` is 'B', default to `True`.
+        act: activation type and arguments. Defaults to relu.
+        norm: feature normalization type and arguments. Defaults to batch norm.
 
     """
 
@@ -221,6 +229,8 @@ def __init__(
         num_classes: int = 400,
         feed_forward: bool = True,
         bias_downsample: bool = True,  # for backwards compatibility (also see PR #5477)
+        act: str | tuple = ("relu", {"inplace": True}),
+        norm: str | tuple = "batch",
     ) -> None:
         super().__init__()
 
@@ -233,7 +243,6 @@ def __init__(
                 raise ValueError("Unknown block '%s', use basic or bottleneck" % block)
 
         conv_type: type[nn.Conv1d | nn.Conv2d | nn.Conv3d] = Conv[Conv.CONV, spatial_dims]
-        norm_type: type[nn.BatchNorm1d | nn.BatchNorm2d | nn.BatchNorm3d] = Norm[Norm.BATCH, spatial_dims]
         pool_type: type[nn.MaxPool1d | nn.MaxPool2d | nn.MaxPool3d] = Pool[Pool.MAX, spatial_dims]
         avgp_type: type[nn.AdaptiveAvgPool1d | nn.AdaptiveAvgPool2d | nn.AdaptiveAvgPool3d] = Pool[
             Pool.ADAPTIVEAVG, spatial_dims
@@ -257,8 +266,10 @@ def __init__(
             padding=tuple(k // 2 for k in conv1_kernel_size),
             bias=False,
         )
-        self.bn1 = norm_type(self.in_planes)
-        self.relu = nn.ReLU(inplace=True)
+
+        norm_layer = get_norm_layer(name=norm, spatial_dims=spatial_dims, channels=self.in_planes)
+        self.bn1 = norm_layer
+        self.act = get_act_layer(name=act)
         self.maxpool = pool_type(kernel_size=3, stride=2, padding=1)
         self.layer1 = self._make_layer(block, block_inplanes[0], layers[0], spatial_dims, shortcut_type)
         self.layer2 = self._make_layer(block, block_inplanes[1], layers[1], spatial_dims, shortcut_type, stride=2)
@@ -270,7 +281,7 @@ def __init__(
         for m in self.modules():
             if isinstance(m, conv_type):
                 nn.init.kaiming_normal_(torch.as_tensor(m.weight), mode="fan_out", nonlinearity="relu")
-            elif isinstance(m, norm_type):
+            elif isinstance(m, type(norm_layer)):
                 nn.init.constant_(torch.as_tensor(m.weight), 1)
                 nn.init.constant_(torch.as_tensor(m.bias), 0)
             elif isinstance(m, nn.Linear):
@@ -290,9 +301,9 @@ def _make_layer(
         spatial_dims: int,
         shortcut_type: str,
         stride: int = 1,
+        norm: str | tuple = "batch",
     ) -> nn.Sequential:
         conv_type: Callable = Conv[Conv.CONV, spatial_dims]
-        norm_type: Callable = Norm[Norm.BATCH, spatial_dims]
 
         downsample: nn.Module | partial | None = None
         if stride != 1 or self.in_planes != planes * block.expansion:
@@ -312,25 +323,30 @@ def _make_layer(
                         stride=stride,
                         bias=self.bias_downsample,
                     ),
-                    norm_type(planes * block.expansion),
+                    get_norm_layer(name=norm, spatial_dims=spatial_dims, channels=planes * block.expansion),
                 )
 
         layers = [
             block(
-                in_planes=self.in_planes, planes=planes, spatial_dims=spatial_dims, stride=stride, downsample=downsample
+                in_planes=self.in_planes,
+                planes=planes,
+                spatial_dims=spatial_dims,
+                stride=stride,
+                downsample=downsample,
+                norm=norm,
             )
         ]
 
         self.in_planes = planes * block.expansion
         for _i in range(1, blocks):
-            layers.append(block(self.in_planes, planes, spatial_dims=spatial_dims))
+            layers.append(block(self.in_planes, planes, spatial_dims=spatial_dims, norm=norm))
 
         return nn.Sequential(*layers)
 
     def forward(self, x: torch.Tensor) -> torch.Tensor:
         x = self.conv1(x)
         x = self.bn1(x)
-        x = self.relu(x)
+        x = self.act(x)
         if not self.no_max_pool:
             x = self.maxpool(x)
 
@@ -397,7 +413,7 @@ def forward(self, inputs: torch.Tensor):
         """
         x = self.conv1(inputs)
         x = self.bn1(x)
-        x = self.relu(x)
+        x = self.act(x)
 
         features = []
         features.append(x)
diff --git a/monai/networks/nets/swin_unetr.py b/monai/networks/nets/swin_unetr.py
index 6f96dfd291..3900c866b3 100644
--- a/monai/networks/nets/swin_unetr.py
+++ b/monai/networks/nets/swin_unetr.py
@@ -347,7 +347,7 @@ def window_partition(x, window_size):
         x: input tensor.
         window_size: local window size.
     """
-    x_shape = x.size()
+    x_shape = x.size()  # length 4 or 5 only
     if len(x_shape) == 5:
         b, d, h, w, c = x_shape
         x = x.view(
@@ -363,10 +363,11 @@ def window_partition(x, window_size):
         windows = (
             x.permute(0, 1, 3, 5, 2, 4, 6, 7).contiguous().view(-1, window_size[0] * window_size[1] * window_size[2], c)
         )
-    elif len(x_shape) == 4:
+    else:  # if len(x_shape) == 4:
         b, h, w, c = x.shape
         x = x.view(b, h // window_size[0], window_size[0], w // window_size[1], window_size[1], c)
         windows = x.permute(0, 1, 3, 2, 4, 5).contiguous().view(-1, window_size[0] * window_size[1], c)
+
     return windows
 
 
@@ -613,7 +614,7 @@ def forward_part1(self, x, mask_matrix):
             _, dp, hp, wp, _ = x.shape
             dims = [b, dp, hp, wp]
 
-        elif len(x_shape) == 4:
+        else:  # elif len(x_shape) == 4
             b, h, w, c = x.shape
             window_size, shift_size = get_window_size((h, w), self.window_size, self.shift_size)
             pad_l = pad_t = 0
diff --git a/monai/networks/schedulers/ddim.py b/monai/networks/schedulers/ddim.py
index 19e24d94b8..2a0121d063 100644
--- a/monai/networks/schedulers/ddim.py
+++ b/monai/networks/schedulers/ddim.py
@@ -184,6 +184,10 @@ def step(
 
         beta_prod_t = 1 - alpha_prod_t
 
+        # predefinitions satisfy pylint/mypy, these values won't be ultimately used
+        pred_original_sample = sample
+        pred_epsilon = model_output
+
         # 3. compute predicted original sample from predicted noise also called
         # "predicted x_0" of formula (12) from https://arxiv.org/pdf/2010.02502.pdf
         if self.prediction_type == DDIMPredictionType.EPSILON:
@@ -258,6 +262,10 @@ def reversed_step(
 
         beta_prod_t = 1 - alpha_prod_t
 
+        # predefinitions satisfy pylint/mypy, these values won't be ultimately used
+        pred_original_sample = sample
+        pred_epsilon = model_output
+
         # 3. compute predicted original sample from predicted noise also called
         # "predicted x_0" of formula (12) from https://arxiv.org/pdf/2010.02502.pdf
 
diff --git a/monai/transforms/__init__.py b/monai/transforms/__init__.py
index ab9adb6a99..ef1da2d855 100644
--- a/monai/transforms/__init__.py
+++ b/monai/transforms/__init__.py
@@ -671,6 +671,7 @@
     in_bounds,
     is_empty,
     is_positive,
+    map_and_generate_sampling_centers,
     map_binary_to_indices,
     map_classes_to_indices,
     map_spatial_axes,
diff --git a/monai/transforms/regularization/array.py b/monai/transforms/regularization/array.py
index 0b495c8623..9186a5c46f 100644
--- a/monai/transforms/regularization/array.py
+++ b/monai/transforms/regularization/array.py
@@ -16,6 +16,9 @@
 
 import torch
 
+from monai.data.meta_obj import get_track_meta
+from monai.utils.type_conversion import convert_to_dst_type, convert_to_tensor
+
 from ..transform import RandomizableTransform
 
 __all__ = ["MixUp", "CutMix", "CutOut", "Mixer"]
@@ -53,9 +56,11 @@ def randomize(self, data=None) -> None:
         as needed. You need to call this method everytime you apply the transform to a new
         batch.
         """
+        super().randomize(None)
         self._params = (
             torch.from_numpy(self.R.beta(self.alpha, self.alpha, self.batch_size)).type(torch.float32),
             self.R.permutation(self.batch_size),
+            [torch.from_numpy(self.R.randint(0, d, size=(1,))) for d in data.shape[2:]] if data is not None else [],
         )
 
 
@@ -69,7 +74,7 @@ class MixUp(Mixer):
     """
 
     def apply(self, data: torch.Tensor):
-        weight, perm = self._params
+        weight, perm, _ = self._params
         nsamples, *dims = data.shape
         if len(weight) != nsamples:
             raise ValueError(f"Expected batch of size: {len(weight)}, but got {nsamples}")
@@ -80,11 +85,21 @@ def apply(self, data: torch.Tensor):
         mixweight = weight[(Ellipsis,) + (None,) * len(dims)]
         return mixweight * data + (1 - mixweight) * data[perm, ...]
 
-    def __call__(self, data: torch.Tensor, labels: torch.Tensor | None = None):
-        self.randomize()
+    def __call__(self, data: torch.Tensor, labels: torch.Tensor | None = None, randomize=True):
+        data_t = convert_to_tensor(data, track_meta=get_track_meta())
+        labels_t = data_t  # will not stay this value, needed to satisfy pylint/mypy
+
+        if labels is not None:
+            labels_t = convert_to_tensor(labels, track_meta=get_track_meta())
+        if randomize:
+            self.randomize()
         if labels is None:
-            return self.apply(data)
-        return self.apply(data), self.apply(labels)
+            return convert_to_dst_type(self.apply(data_t), dst=data)[0]
+
+        return (
+            convert_to_dst_type(self.apply(data_t), dst=data)[0],
+            convert_to_dst_type(self.apply(labels_t), dst=labels)[0],
+        )
 
 
 class CutMix(Mixer):
@@ -113,14 +128,13 @@ class CutMix(Mixer):
     """
 
     def apply(self, data: torch.Tensor):
-        weights, perm = self._params
+        weights, perm, coords = self._params
         nsamples, _, *dims = data.shape
         if len(weights) != nsamples:
             raise ValueError(f"Expected batch of size: {len(weights)}, but got {nsamples}")
 
         mask = torch.ones_like(data)
         for s, weight in enumerate(weights):
-            coords = [torch.randint(0, d, size=(1,)) for d in dims]
             lengths = [d * sqrt(1 - weight) for d in dims]
             idx = [slice(None)] + [slice(c, min(ceil(c + ln), d)) for c, ln, d in zip(coords, lengths, dims)]
             mask[s][idx] = 0
@@ -128,7 +142,7 @@ def apply(self, data: torch.Tensor):
         return mask * data + (1 - mask) * data[perm, ...]
 
     def apply_on_labels(self, labels: torch.Tensor):
-        weights, perm = self._params
+        weights, perm, _ = self._params
         nsamples, *dims = labels.shape
         if len(weights) != nsamples:
             raise ValueError(f"Expected batch of size: {len(weights)}, but got {nsamples}")
@@ -136,10 +150,20 @@ def apply_on_labels(self, labels: torch.Tensor):
         mixweight = weights[(Ellipsis,) + (None,) * len(dims)]
         return mixweight * labels + (1 - mixweight) * labels[perm, ...]
 
-    def __call__(self, data: torch.Tensor, labels: torch.Tensor | None = None):
-        self.randomize()
-        augmented = self.apply(data)
-        return (augmented, self.apply_on_labels(labels)) if labels is not None else augmented
+    def __call__(self, data: torch.Tensor, labels: torch.Tensor | None = None, randomize=True):
+        data_t = convert_to_tensor(data, track_meta=get_track_meta())
+        augmented_label = None
+
+        if labels is not None:
+            labels_t = convert_to_tensor(labels, track_meta=get_track_meta())
+        if randomize:
+            self.randomize(data)
+        augmented = convert_to_dst_type(self.apply(data_t), dst=data)[0]
+
+        if labels is not None:
+            augmented_label = convert_to_dst_type(self.apply(labels_t), dst=labels)[0]
+
+        return (augmented, augmented_label) if labels is not None else augmented
 
 
 class CutOut(Mixer):
@@ -155,20 +179,21 @@ class CutOut(Mixer):
     """
 
     def apply(self, data: torch.Tensor):
-        weights, _ = self._params
+        weights, _, coords = self._params
         nsamples, _, *dims = data.shape
         if len(weights) != nsamples:
             raise ValueError(f"Expected batch of size: {len(weights)}, but got {nsamples}")
 
         mask = torch.ones_like(data)
         for s, weight in enumerate(weights):
-            coords = [torch.randint(0, d, size=(1,)) for d in dims]
             lengths = [d * sqrt(1 - weight) for d in dims]
             idx = [slice(None)] + [slice(c, min(ceil(c + ln), d)) for c, ln, d in zip(coords, lengths, dims)]
             mask[s][idx] = 0
 
         return mask * data
 
-    def __call__(self, data: torch.Tensor):
-        self.randomize()
-        return self.apply(data)
+    def __call__(self, data: torch.Tensor, randomize=True):
+        data_t = convert_to_tensor(data, track_meta=get_track_meta())
+        if randomize:
+            self.randomize(data)
+        return convert_to_dst_type(self.apply(data_t), dst=data)[0]
diff --git a/monai/transforms/regularization/dictionary.py b/monai/transforms/regularization/dictionary.py
index 373913da99..d8815e47b9 100644
--- a/monai/transforms/regularization/dictionary.py
+++ b/monai/transforms/regularization/dictionary.py
@@ -11,16 +11,23 @@
 
 from __future__ import annotations
 
+from collections.abc import Hashable
+
+import numpy as np
+
 from monai.config import KeysCollection
+from monai.config.type_definitions import NdarrayOrTensor
+from monai.data.meta_obj import get_track_meta
+from monai.utils import convert_to_tensor
 from monai.utils.misc import ensure_tuple
 
-from ..transform import MapTransform
+from ..transform import MapTransform, RandomizableTransform
 from .array import CutMix, CutOut, MixUp
 
 __all__ = ["MixUpd", "MixUpD", "MixUpDict", "CutMixd", "CutMixD", "CutMixDict", "CutOutd", "CutOutD", "CutOutDict"]
 
 
-class MixUpd(MapTransform):
+class MixUpd(MapTransform, RandomizableTransform):
     """
     Dictionary-based version :py:class:`monai.transforms.MixUp`.
 
@@ -31,18 +38,24 @@ class MixUpd(MapTransform):
     def __init__(
         self, keys: KeysCollection, batch_size: int, alpha: float = 1.0, allow_missing_keys: bool = False
     ) -> None:
-        super().__init__(keys, allow_missing_keys)
+        MapTransform.__init__(self, keys, allow_missing_keys)
         self.mixup = MixUp(batch_size, alpha)
 
+    def set_random_state(self, seed: int | None = None, state: np.random.RandomState | None = None) -> MixUpd:
+        super().set_random_state(seed, state)
+        self.mixup.set_random_state(seed, state)
+        return self
+
     def __call__(self, data):
-        self.mixup.randomize()
-        result = dict(data)
-        for k in self.keys:
-            result[k] = self.mixup.apply(data[k])
-        return result
+        d = dict(data)
+        # all the keys share the same random state
+        self.mixup.randomize(None)
+        for k in self.key_iterator(d):
+            d[k] = self.mixup(data[k], randomize=False)
+        return d
 
 
-class CutMixd(MapTransform):
+class CutMixd(MapTransform, RandomizableTransform):
     """
     Dictionary-based version :py:class:`monai.transforms.CutMix`.
 
@@ -63,17 +76,27 @@ def __init__(
         self.mixer = CutMix(batch_size, alpha)
         self.label_keys = ensure_tuple(label_keys) if label_keys is not None else []
 
-    def __call__(self, data):
-        self.mixer.randomize()
-        result = dict(data)
-        for k in self.keys:
-            result[k] = self.mixer.apply(data[k])
-        for k in self.label_keys:
-            result[k] = self.mixer.apply_on_labels(data[k])
-        return result
-
+    def set_random_state(self, seed: int | None = None, state: np.random.RandomState | None = None) -> CutMixd:
+        super().set_random_state(seed, state)
+        self.mixer.set_random_state(seed, state)
+        return self
 
-class CutOutd(MapTransform):
+    def __call__(self, data):
+        d = dict(data)
+        first_key: Hashable = self.first_key(d)
+        if first_key == ():
+            out: dict[Hashable, NdarrayOrTensor] = convert_to_tensor(d, track_meta=get_track_meta())
+            return out
+        self.mixer.randomize(d[first_key])
+        for key, label_key in self.key_iterator(d, self.label_keys):
+            ret = self.mixer(data[key], data.get(label_key, None), randomize=False)
+            d[key] = ret[0]
+            if label_key in d:
+                d[label_key] = ret[1]
+        return d
+
+
+class CutOutd(MapTransform, RandomizableTransform):
     """
     Dictionary-based version :py:class:`monai.transforms.CutOut`.
 
@@ -84,12 +107,21 @@ def __init__(self, keys: KeysCollection, batch_size: int, allow_missing_keys: bo
         super().__init__(keys, allow_missing_keys)
         self.cutout = CutOut(batch_size)
 
+    def set_random_state(self, seed: int | None = None, state: np.random.RandomState | None = None) -> CutOutd:
+        super().set_random_state(seed, state)
+        self.cutout.set_random_state(seed, state)
+        return self
+
     def __call__(self, data):
-        result = dict(data)
-        self.cutout.randomize()
-        for k in self.keys:
-            result[k] = self.cutout(data[k])
-        return result
+        d = dict(data)
+        first_key: Hashable = self.first_key(d)
+        if first_key == ():
+            out: dict[Hashable, NdarrayOrTensor] = convert_to_tensor(d, track_meta=get_track_meta())
+            return out
+        self.cutout.randomize(d[first_key])
+        for k in self.key_iterator(d):
+            d[k] = self.cutout(data[k], randomize=False)
+        return d
 
 
 MixUpD = MixUpDict = MixUpd
diff --git a/monai/transforms/utils.py b/monai/transforms/utils.py
index 560dbac346..d8461d927b 100644
--- a/monai/transforms/utils.py
+++ b/monai/transforms/utils.py
@@ -108,6 +108,7 @@
     "in_bounds",
     "is_empty",
     "is_positive",
+    "map_and_generate_sampling_centers",
     "map_binary_to_indices",
     "map_classes_to_indices",
     "map_spatial_axes",
@@ -368,6 +369,70 @@ def check_non_lazy_pending_ops(
         warnings.warn(msg)
 
 
+def map_and_generate_sampling_centers(
+    label: NdarrayOrTensor,
+    spatial_size: Sequence[int] | int,
+    num_samples: int,
+    label_spatial_shape: Sequence[int] | None = None,
+    num_classes: int | None = None,
+    image: NdarrayOrTensor | None = None,
+    image_threshold: float = 0.0,
+    max_samples_per_class: int | None = None,
+    ratios: list[float | int] | None = None,
+    rand_state: np.random.RandomState | None = None,
+    allow_smaller: bool = False,
+    warn: bool = True,
+) -> tuple[tuple]:
+    """
+    Combine "map_classes_to_indices" and "generate_label_classes_crop_centers" functions, return crop center coordinates.
+    This calls `map_classes_to_indices` to get indices from `label`, gets the shape from `label_spatial_shape`
+    is given otherwise from the labels, calls `generate_label_classes_crop_centers`, and returns its results.
+
+    Args:
+        label: use the label data to get the indices of every class.
+        spatial_size: spatial size of the ROIs to be sampled.
+        num_samples: total sample centers to be generated.
+        label_spatial_shape: spatial shape of the original label data to unravel selected centers.
+        indices: sequence of pre-computed foreground indices of every class in 1 dimension.
+        num_classes: number of classes for argmax label, not necessary for One-Hot label.
+        image: if image is not None, only return the indices of every class that are within the valid
+            region of the image (``image > image_threshold``).
+        image_threshold: if enabled `image`, use ``image > image_threshold`` to
+            determine the valid image content area and select class indices only in this area.
+        max_samples_per_class: maximum length of indices in each class to reduce memory consumption.
+            Default is None, no subsampling.
+        ratios: ratios of every class in the label to generate crop centers, including background class.
+            if None, every class will have the same ratio to generate crop centers.
+        rand_state: numpy randomState object to align with other modules.
+        allow_smaller: if `False`, an exception will be raised if the image is smaller than
+            the requested ROI in any dimension. If `True`, any smaller dimensions will be set to
+            match the cropped size (i.e., no cropping in that dimension).
+        warn: if `True` prints a warning if a class is not present in the label.
+    Returns:
+        Tuple of crop centres
+    """
+    if label is None:
+        raise ValueError("label must not be None.")
+    indices = map_classes_to_indices(label, num_classes, image, image_threshold, max_samples_per_class)
+
+    if label_spatial_shape is not None:
+        _shape = label_spatial_shape
+    elif isinstance(label, monai.data.MetaTensor):
+        _shape = label.peek_pending_shape()
+    else:
+        _shape = label.shape[1:]
+
+    if _shape is None:
+        raise ValueError(
+            "label_spatial_shape or label with a known shape must be provided to infer the output spatial shape."
+        )
+    centers = generate_label_classes_crop_centers(
+        spatial_size, num_samples, _shape, indices, ratios, rand_state, allow_smaller, warn
+    )
+
+    return ensure_tuple(centers)
+
+
 def map_binary_to_indices(
     label: NdarrayOrTensor, image: NdarrayOrTensor | None = None, image_threshold: float = 0.0
 ) -> tuple[NdarrayOrTensor, NdarrayOrTensor]:
diff --git a/monai/transforms/utils_create_transform_ims.py b/monai/transforms/utils_create_transform_ims.py
index 4b5990abd3..a29fd4dbf9 100644
--- a/monai/transforms/utils_create_transform_ims.py
+++ b/monai/transforms/utils_create_transform_ims.py
@@ -269,11 +269,9 @@ def update_docstring(code_path, transform_name):
 
 
 def pre_process_data(data, ndim, is_map, is_post):
-    """If transform requires 2D data, then convert to 2D"""
+    """If transform requires 2D data, then convert to 2D by selecting the middle of the last dimension."""
     if ndim == 2:
-        for k in keys:
-            data[k] = data[k][..., data[k].shape[-1] // 2]
-
+        data = {k: v[..., v.shape[-1] // 2] for k, v in data.items()}
     if is_map:
         return data
     return data[CommonKeys.LABEL] if is_post else data[CommonKeys.IMAGE]
diff --git a/requirements-dev.txt b/requirements-dev.txt
index 35ff3382be..c50d9248df 100644
--- a/requirements-dev.txt
+++ b/requirements-dev.txt
@@ -33,8 +33,8 @@ tifffile; platform_system == "Linux" or platform_system == "Darwin"
 pandas
 requests
 einops
-transformers>=4.36.0
-mlflow>=1.28.0, <=2.11.3
+transformers>=4.36.0, <4.41.0; python_version <= '3.10'
+mlflow>=2.12.2
 clearml>=1.10.0rc0
 matplotlib>=3.6.3
 tensorboardX
diff --git a/setup.cfg b/setup.cfg
index c90b043c1c..7b82784a8a 100644
--- a/setup.cfg
+++ b/setup.cfg
@@ -65,8 +65,8 @@ all =
     imagecodecs
     pandas
     einops
-    transformers<4.22; python_version <= '3.10'
-    mlflow>=1.28.0, <=2.11.3
+    transformers>=4.36.0, <4.41.0; python_version <= '3.10'
+    mlflow>=2.12.2
     clearml>=1.10.0rc0
     matplotlib>=3.6.3
     tensorboardX
@@ -123,9 +123,9 @@ pandas =
 einops =
     einops
 transformers =
-    transformers<4.22; python_version <= '3.10'
+    transformers>=4.36.0, <4.41.0; python_version <= '3.10'
 mlflow =
-    mlflow>=1.28.0, <=2.11.3
+    mlflow>=2.12.2
 matplotlib =
     matplotlib>=3.6.3
 clearml =
diff --git a/tests/hvd_evenly_divisible_all_gather.py b/tests/hvd_evenly_divisible_all_gather.py
index 78c6ca06bc..732ad13b83 100644
--- a/tests/hvd_evenly_divisible_all_gather.py
+++ b/tests/hvd_evenly_divisible_all_gather.py
@@ -30,10 +30,10 @@ def test_data(self):
         self._run()
 
     def _run(self):
-        if hvd.rank() == 0:
-            data1 = torch.tensor([[1, 2], [3, 4]])
-            data2 = torch.tensor([[1.0, 2.0]])
-            data3 = torch.tensor(7)
+        # if hvd.rank() == 0:
+        data1 = torch.tensor([[1, 2], [3, 4]])
+        data2 = torch.tensor([[1.0, 2.0]])
+        data3 = torch.tensor(7)
 
         if hvd.rank() == 1:
             data1 = torch.tensor([[5, 6]])
diff --git a/tests/test_arraydataset.py b/tests/test_arraydataset.py
index efc014a267..b61b3c139c 100644
--- a/tests/test_arraydataset.py
+++ b/tests/test_arraydataset.py
@@ -41,7 +41,7 @@
 
 class TestCompose(Compose):
 
-    def __call__(self, input_, lazy):
+    def __call__(self, input_, lazy=False):
         img = self.transforms[0](input_)
         metadata = img.meta
         img = self.transforms[1](img)
diff --git a/tests/test_clip_intensity_percentiles.py b/tests/test_clip_intensity_percentiles.py
index af157446f6..77f811db87 100644
--- a/tests/test_clip_intensity_percentiles.py
+++ b/tests/test_clip_intensity_percentiles.py
@@ -18,9 +18,32 @@
 from monai.transforms import ClipIntensityPercentiles
 from monai.transforms.utils import soft_clip
 from monai.transforms.utils_pytorch_numpy_unification import clip, percentile
+from monai.utils.type_conversion import convert_to_tensor
 from tests.utils import TEST_NDARRAYS, NumpyImageTestCase2D, NumpyImageTestCase3D, assert_allclose
 
 
+def test_hard_clip_func(im, lower, upper):
+    im_t = convert_to_tensor(im)
+    if lower is None:
+        upper = percentile(im_t, upper)
+    elif upper is None:
+        lower = percentile(im_t, lower)
+    else:
+        lower, upper = percentile(im_t, (lower, upper))
+    return clip(im_t, lower, upper)
+
+
+def test_soft_clip_func(im, lower, upper):
+    im_t = convert_to_tensor(im)
+    if lower is None:
+        upper = percentile(im_t, upper)
+    elif upper is None:
+        lower = percentile(im_t, lower)
+    else:
+        lower, upper = percentile(im_t, (lower, upper))
+    return soft_clip(im_t, minv=lower, maxv=upper, sharpness_factor=1.0, dtype=torch.float32)
+
+
 class TestClipIntensityPercentiles2D(NumpyImageTestCase2D):
 
     @parameterized.expand([[p] for p in TEST_NDARRAYS])
@@ -28,8 +51,7 @@ def test_hard_clipping_two_sided(self, p):
         hard_clipper = ClipIntensityPercentiles(upper=95, lower=5)
         im = p(self.imt)
         result = hard_clipper(im)
-        lower, upper = percentile(im, (5, 95))
-        expected = clip(im, lower, upper)
+        expected = test_hard_clip_func(im, 5, 95)
         assert_allclose(result, p(expected), type_test="tensor", rtol=1e-4, atol=0)
 
     @parameterized.expand([[p] for p in TEST_NDARRAYS])
@@ -37,8 +59,7 @@ def test_hard_clipping_one_sided_high(self, p):
         hard_clipper = ClipIntensityPercentiles(upper=95, lower=None)
         im = p(self.imt)
         result = hard_clipper(im)
-        lower, upper = percentile(im, (0, 95))
-        expected = clip(im, lower, upper)
+        expected = test_hard_clip_func(im, 0, 95)
         assert_allclose(result, p(expected), type_test="tensor", rtol=1e-4, atol=0)
 
     @parameterized.expand([[p] for p in TEST_NDARRAYS])
@@ -46,8 +67,7 @@ def test_hard_clipping_one_sided_low(self, p):
         hard_clipper = ClipIntensityPercentiles(upper=None, lower=5)
         im = p(self.imt)
         result = hard_clipper(im)
-        lower, upper = percentile(im, (5, 100))
-        expected = clip(im, lower, upper)
+        expected = test_hard_clip_func(im, 5, 100)
         assert_allclose(result, p(expected), type_test="tensor", rtol=1e-4, atol=0)
 
     @parameterized.expand([[p] for p in TEST_NDARRAYS])
@@ -55,37 +75,35 @@ def test_soft_clipping_two_sided(self, p):
         soft_clipper = ClipIntensityPercentiles(upper=95, lower=5, sharpness_factor=1.0)
         im = p(self.imt)
         result = soft_clipper(im)
-        lower, upper = percentile(im, (5, 95))
-        expected = soft_clip(im, sharpness_factor=1.0, minv=lower, maxv=upper, dtype=torch.float32)
-        # the rtol is set to 1e-6 because the logaddexp function used in softplus is not stable accross torch and numpy
-        assert_allclose(result, p(expected), type_test="tensor", rtol=1e-6, atol=0)
+        expected = test_soft_clip_func(im, 5, 95)
+        # the rtol is set to 1e-4 because the logaddexp function used in softplus is not stable accross torch and numpy
+        assert_allclose(result, p(expected), type_test="tensor", rtol=1e-4, atol=0)
 
     @parameterized.expand([[p] for p in TEST_NDARRAYS])
     def test_soft_clipping_one_sided_high(self, p):
         soft_clipper = ClipIntensityPercentiles(upper=95, lower=None, sharpness_factor=1.0)
         im = p(self.imt)
         result = soft_clipper(im)
-        upper = percentile(im, 95)
-        expected = soft_clip(im, sharpness_factor=1.0, minv=None, maxv=upper, dtype=torch.float32)
-        # the rtol is set to 5e-5 because the logaddexp function used in softplus is not stable accross torch and numpy
-        assert_allclose(result, p(expected), type_test="tensor", rtol=5e-5, atol=0)
+        expected = test_soft_clip_func(im, None, 95)
+        # the rtol is set to 1e-4 because the logaddexp function used in softplus is not stable accross torch and numpy
+        assert_allclose(result, p(expected), type_test="tensor", rtol=1e-4, atol=0)
 
     @parameterized.expand([[p] for p in TEST_NDARRAYS])
     def test_soft_clipping_one_sided_low(self, p):
         soft_clipper = ClipIntensityPercentiles(upper=None, lower=5, sharpness_factor=1.0)
         im = p(self.imt)
         result = soft_clipper(im)
-        lower = percentile(im, 5)
-        expected = soft_clip(im, sharpness_factor=1.0, minv=lower, maxv=None, dtype=torch.float32)
-        # the rtol is set to 1e-6 because the logaddexp function used in softplus is not stable accross torch and numpy
-        assert_allclose(result, p(expected), type_test="tensor", rtol=1e-6, atol=0)
+        expected = test_soft_clip_func(im, 5, None)
+        # the rtol is set to 1e-4 because the logaddexp function used in softplus is not stable accross torch and numpy
+        assert_allclose(result, p(expected), type_test="tensor", rtol=1e-4, atol=0)
 
     @parameterized.expand([[p] for p in TEST_NDARRAYS])
     def test_channel_wise(self, p):
         clipper = ClipIntensityPercentiles(upper=95, lower=5, channel_wise=True)
         im = p(self.imt)
         result = clipper(im)
-        for i, c in enumerate(im):
+        im_t = convert_to_tensor(self.imt)
+        for i, c in enumerate(im_t):
             lower, upper = percentile(c, (5, 95))
             expected = clip(c, lower, upper)
             assert_allclose(result[i], p(expected), type_test="tensor", rtol=1e-4, atol=0)
@@ -118,8 +136,7 @@ def test_hard_clipping_two_sided(self, p):
         hard_clipper = ClipIntensityPercentiles(upper=95, lower=5)
         im = p(self.imt)
         result = hard_clipper(im)
-        lower, upper = percentile(im, (5, 95))
-        expected = clip(im, lower, upper)
+        expected = test_hard_clip_func(im, 5, 95)
         assert_allclose(result, p(expected), type_test="tensor", rtol=1e-4, atol=0)
 
     @parameterized.expand([[p] for p in TEST_NDARRAYS])
@@ -127,8 +144,7 @@ def test_hard_clipping_one_sided_high(self, p):
         hard_clipper = ClipIntensityPercentiles(upper=95, lower=None)
         im = p(self.imt)
         result = hard_clipper(im)
-        lower, upper = percentile(im, (0, 95))
-        expected = clip(im, lower, upper)
+        expected = test_hard_clip_func(im, 0, 95)
         assert_allclose(result, p(expected), type_test="tensor", rtol=1e-4, atol=0)
 
     @parameterized.expand([[p] for p in TEST_NDARRAYS])
@@ -136,8 +152,7 @@ def test_hard_clipping_one_sided_low(self, p):
         hard_clipper = ClipIntensityPercentiles(upper=None, lower=5)
         im = p(self.imt)
         result = hard_clipper(im)
-        lower, upper = percentile(im, (5, 100))
-        expected = clip(im, lower, upper)
+        expected = test_hard_clip_func(im, 5, 100)
         assert_allclose(result, p(expected), type_test="tensor", rtol=1e-4, atol=0)
 
     @parameterized.expand([[p] for p in TEST_NDARRAYS])
@@ -145,37 +160,35 @@ def test_soft_clipping_two_sided(self, p):
         soft_clipper = ClipIntensityPercentiles(upper=95, lower=5, sharpness_factor=1.0)
         im = p(self.imt)
         result = soft_clipper(im)
-        lower, upper = percentile(im, (5, 95))
-        expected = soft_clip(im, sharpness_factor=1.0, minv=lower, maxv=upper, dtype=torch.float32)
-        # the rtol is set to 1e-6 because the logaddexp function used in softplus is not stable accross torch and numpy
-        assert_allclose(result, p(expected), type_test="tensor", rtol=1e-6, atol=0)
+        expected = test_soft_clip_func(im, 5, 95)
+        # the rtol is set to 1e-4 because the logaddexp function used in softplus is not stable accross torch and numpy
+        assert_allclose(result, p(expected), type_test="tensor", rtol=1e-4, atol=0)
 
     @parameterized.expand([[p] for p in TEST_NDARRAYS])
     def test_soft_clipping_one_sided_high(self, p):
         soft_clipper = ClipIntensityPercentiles(upper=95, lower=None, sharpness_factor=1.0)
         im = p(self.imt)
         result = soft_clipper(im)
-        upper = percentile(im, 95)
-        expected = soft_clip(im, sharpness_factor=1.0, minv=None, maxv=upper, dtype=torch.float32)
-        # the rtol is set to 5e-5 because the logaddexp function used in softplus is not stable accross torch and numpy
-        assert_allclose(result, p(expected), type_test="tensor", rtol=5e-5, atol=0)
+        expected = test_soft_clip_func(im, None, 95)
+        # the rtol is set to 1e-4 because the logaddexp function used in softplus is not stable accross torch and numpy
+        assert_allclose(result, p(expected), type_test="tensor", rtol=1e-4, atol=0)
 
     @parameterized.expand([[p] for p in TEST_NDARRAYS])
     def test_soft_clipping_one_sided_low(self, p):
         soft_clipper = ClipIntensityPercentiles(upper=None, lower=5, sharpness_factor=1.0)
         im = p(self.imt)
         result = soft_clipper(im)
-        lower = percentile(im, 5)
-        expected = soft_clip(im, sharpness_factor=1.0, minv=lower, maxv=None, dtype=torch.float32)
-        # the rtol is set to 1e-6 because the logaddexp function used in softplus is not stable accross torch and numpy
-        assert_allclose(result, p(expected), type_test="tensor", rtol=1e-6, atol=0)
+        expected = test_soft_clip_func(im, 5, None)
+        # the rtol is set to 1e-4 because the logaddexp function used in softplus is not stable accross torch and numpy
+        assert_allclose(result, p(expected), type_test="tensor", rtol=1e-4, atol=0)
 
     @parameterized.expand([[p] for p in TEST_NDARRAYS])
     def test_channel_wise(self, p):
         clipper = ClipIntensityPercentiles(upper=95, lower=5, channel_wise=True)
         im = p(self.imt)
         result = clipper(im)
-        for i, c in enumerate(im):
+        im_t = convert_to_tensor(self.imt)
+        for i, c in enumerate(im_t):
             lower, upper = percentile(c, (5, 95))
             expected = clip(c, lower, upper)
             assert_allclose(result[i], p(expected), type_test="tensor", rtol=1e-4, atol=0)
diff --git a/tests/test_clip_intensity_percentilesd.py b/tests/test_clip_intensity_percentilesd.py
index ed4fc588cb..3e06b18418 100644
--- a/tests/test_clip_intensity_percentilesd.py
+++ b/tests/test_clip_intensity_percentilesd.py
@@ -13,14 +13,15 @@
 
 import unittest
 
-import torch
 from parameterized import parameterized
 
 from monai.transforms import ClipIntensityPercentilesd
-from monai.transforms.utils import soft_clip
 from monai.transforms.utils_pytorch_numpy_unification import clip, percentile
+from monai.utils.type_conversion import convert_to_tensor
 from tests.utils import TEST_NDARRAYS, NumpyImageTestCase2D, NumpyImageTestCase3D, assert_allclose
 
+from .test_clip_intensity_percentiles import test_hard_clip_func, test_soft_clip_func
+
 
 class TestClipIntensityPercentilesd2D(NumpyImageTestCase2D):
 
@@ -30,8 +31,7 @@ def test_hard_clipping_two_sided(self, p):
         hard_clipper = ClipIntensityPercentilesd(keys=[key], upper=95, lower=5)
         im = p(self.imt)
         result = hard_clipper({key: im})
-        lower, upper = percentile(im, (5, 95))
-        expected = clip(im, lower, upper)
+        expected = test_hard_clip_func(im, 5, 95)
         assert_allclose(result[key], p(expected), type_test="tensor", rtol=1e-4, atol=0)
 
     @parameterized.expand([[p] for p in TEST_NDARRAYS])
@@ -40,8 +40,7 @@ def test_hard_clipping_one_sided_high(self, p):
         hard_clipper = ClipIntensityPercentilesd(keys=[key], upper=95, lower=None)
         im = p(self.imt)
         result = hard_clipper({key: im})
-        lower, upper = percentile(im, (0, 95))
-        expected = clip(im, lower, upper)
+        expected = test_hard_clip_func(im, 0, 95)
         assert_allclose(result[key], p(expected), type_test="tensor", rtol=1e-4, atol=0)
 
     @parameterized.expand([[p] for p in TEST_NDARRAYS])
@@ -50,8 +49,7 @@ def test_hard_clipping_one_sided_low(self, p):
         hard_clipper = ClipIntensityPercentilesd(keys=[key], upper=None, lower=5)
         im = p(self.imt)
         result = hard_clipper({key: im})
-        lower, upper = percentile(im, (5, 100))
-        expected = clip(im, lower, upper)
+        expected = test_hard_clip_func(im, 5, 100)
         assert_allclose(result[key], p(expected), type_test="tensor", rtol=1e-4, atol=0)
 
     @parameterized.expand([[p] for p in TEST_NDARRAYS])
@@ -60,10 +58,9 @@ def test_soft_clipping_two_sided(self, p):
         soft_clipper = ClipIntensityPercentilesd(keys=[key], upper=95, lower=5, sharpness_factor=1.0)
         im = p(self.imt)
         result = soft_clipper({key: im})
-        lower, upper = percentile(im, (5, 95))
-        expected = soft_clip(im, sharpness_factor=1.0, minv=lower, maxv=upper, dtype=torch.float32)
-        # the rtol is set to 1e-6 because the logaddexp function used in softplus is not stable accross torch and numpy
-        assert_allclose(result[key], p(expected), type_test="tensor", rtol=1e-6, atol=0)
+        expected = test_soft_clip_func(im, 5, 95)
+        # the rtol is set to 1e-4 because the logaddexp function used in softplus is not stable accross torch and numpy
+        assert_allclose(result[key], p(expected), type_test="tensor", rtol=1e-4, atol=0)
 
     @parameterized.expand([[p] for p in TEST_NDARRAYS])
     def test_soft_clipping_one_sided_high(self, p):
@@ -71,10 +68,9 @@ def test_soft_clipping_one_sided_high(self, p):
         soft_clipper = ClipIntensityPercentilesd(keys=[key], upper=95, lower=None, sharpness_factor=1.0)
         im = p(self.imt)
         result = soft_clipper({key: im})
-        upper = percentile(im, 95)
-        expected = soft_clip(im, sharpness_factor=1.0, minv=None, maxv=upper, dtype=torch.float32)
-        # the rtol is set to 5e-5 because the logaddexp function used in softplus is not stable accross torch and numpy
-        assert_allclose(result[key], p(expected), type_test="tensor", rtol=5e-5, atol=0)
+        expected = test_soft_clip_func(im, None, 95)
+        # the rtol is set to 1e-4 because the logaddexp function used in softplus is not stable accross torch and numpy
+        assert_allclose(result[key], p(expected), type_test="tensor", rtol=1e-4, atol=0)
 
     @parameterized.expand([[p] for p in TEST_NDARRAYS])
     def test_soft_clipping_one_sided_low(self, p):
@@ -82,10 +78,9 @@ def test_soft_clipping_one_sided_low(self, p):
         soft_clipper = ClipIntensityPercentilesd(keys=[key], upper=None, lower=5, sharpness_factor=1.0)
         im = p(self.imt)
         result = soft_clipper({key: im})
-        lower = percentile(im, 5)
-        expected = soft_clip(im, sharpness_factor=1.0, minv=lower, maxv=None, dtype=torch.float32)
-        # the rtol is set to 1e-6 because the logaddexp function used in softplus is not stable accross torch and numpy
-        assert_allclose(result[key], p(expected), type_test="tensor", rtol=1e-6, atol=0)
+        expected = test_soft_clip_func(im, 5, None)
+        # the rtol is set to 1e-4 because the logaddexp function used in softplus is not stable accross torch and numpy
+        assert_allclose(result[key], p(expected), type_test="tensor", rtol=1e-4, atol=0)
 
     @parameterized.expand([[p] for p in TEST_NDARRAYS])
     def test_channel_wise(self, p):
@@ -93,7 +88,8 @@ def test_channel_wise(self, p):
         clipper = ClipIntensityPercentilesd(keys=[key], upper=95, lower=5, channel_wise=True)
         im = p(self.imt)
         result = clipper({key: im})
-        for i, c in enumerate(im):
+        im_t = convert_to_tensor(self.imt)
+        for i, c in enumerate(im_t):
             lower, upper = percentile(c, (5, 95))
             expected = clip(c, lower, upper)
             assert_allclose(result[key][i], p(expected), type_test="tensor", rtol=1e-3, atol=0)
@@ -132,8 +128,7 @@ def test_hard_clipping_two_sided(self, p):
         hard_clipper = ClipIntensityPercentilesd(keys=[key], upper=95, lower=5)
         im = p(self.imt)
         result = hard_clipper({key: im})
-        lower, upper = percentile(im, (5, 95))
-        expected = clip(im, lower, upper)
+        expected = test_hard_clip_func(im, 5, 95)
         assert_allclose(result[key], p(expected), type_test="tensor", rtol=1e-4, atol=0)
 
     @parameterized.expand([[p] for p in TEST_NDARRAYS])
@@ -142,8 +137,7 @@ def test_hard_clipping_one_sided_high(self, p):
         hard_clipper = ClipIntensityPercentilesd(keys=[key], upper=95, lower=None)
         im = p(self.imt)
         result = hard_clipper({key: im})
-        lower, upper = percentile(im, (0, 95))
-        expected = clip(im, lower, upper)
+        expected = test_hard_clip_func(im, 0, 95)
         assert_allclose(result[key], p(expected), type_test="tensor", rtol=1e-4, atol=0)
 
     @parameterized.expand([[p] for p in TEST_NDARRAYS])
@@ -152,8 +146,7 @@ def test_hard_clipping_one_sided_low(self, p):
         hard_clipper = ClipIntensityPercentilesd(keys=[key], upper=None, lower=5)
         im = p(self.imt)
         result = hard_clipper({key: im})
-        lower, upper = percentile(im, (5, 100))
-        expected = clip(im, lower, upper)
+        expected = test_hard_clip_func(im, 5, 100)
         assert_allclose(result[key], p(expected), type_test="tensor", rtol=1e-4, atol=0)
 
     @parameterized.expand([[p] for p in TEST_NDARRAYS])
@@ -162,10 +155,9 @@ def test_soft_clipping_two_sided(self, p):
         soft_clipper = ClipIntensityPercentilesd(keys=[key], upper=95, lower=5, sharpness_factor=1.0)
         im = p(self.imt)
         result = soft_clipper({key: im})
-        lower, upper = percentile(im, (5, 95))
-        expected = soft_clip(im, sharpness_factor=1.0, minv=lower, maxv=upper, dtype=torch.float32)
-        # the rtol is set to 1e-6 because the logaddexp function used in softplus is not stable accross torch and numpy
-        assert_allclose(result[key], p(expected), type_test="tensor", rtol=1e-6, atol=0)
+        expected = test_soft_clip_func(im, 5, 95)
+        # the rtol is set to 1e-4 because the logaddexp function used in softplus is not stable accross torch and numpy
+        assert_allclose(result[key], p(expected), type_test="tensor", rtol=1e-4, atol=0)
 
     @parameterized.expand([[p] for p in TEST_NDARRAYS])
     def test_soft_clipping_one_sided_high(self, p):
@@ -173,10 +165,9 @@ def test_soft_clipping_one_sided_high(self, p):
         soft_clipper = ClipIntensityPercentilesd(keys=[key], upper=95, lower=None, sharpness_factor=1.0)
         im = p(self.imt)
         result = soft_clipper({key: im})
-        upper = percentile(im, 95)
-        expected = soft_clip(im, sharpness_factor=1.0, minv=None, maxv=upper, dtype=torch.float32)
-        # the rtol is set to 5e-5 because the logaddexp function used in softplus is not stable accross torch and numpy
-        assert_allclose(result[key], p(expected), type_test="tensor", rtol=5e-5, atol=0)
+        expected = test_soft_clip_func(im, None, 95)
+        # the rtol is set to 1e-4 because the logaddexp function used in softplus is not stable accross torch and numpy
+        assert_allclose(result[key], p(expected), type_test="tensor", rtol=1e-4, atol=0)
 
     @parameterized.expand([[p] for p in TEST_NDARRAYS])
     def test_soft_clipping_one_sided_low(self, p):
@@ -184,10 +175,9 @@ def test_soft_clipping_one_sided_low(self, p):
         soft_clipper = ClipIntensityPercentilesd(keys=[key], upper=None, lower=5, sharpness_factor=1.0)
         im = p(self.imt)
         result = soft_clipper({key: im})
-        lower = percentile(im, 5)
-        expected = soft_clip(im, sharpness_factor=1.0, minv=lower, maxv=None, dtype=torch.float32)
+        expected = test_soft_clip_func(im, 5, None)
         # the rtol is set to 1e-6 because the logaddexp function used in softplus is not stable accross torch and numpy
-        assert_allclose(result[key], p(expected), type_test="tensor", rtol=1e-6, atol=0)
+        assert_allclose(result[key], p(expected), type_test="tensor", rtol=1e-4, atol=0)
 
     @parameterized.expand([[p] for p in TEST_NDARRAYS])
     def test_channel_wise(self, p):
@@ -195,7 +185,8 @@ def test_channel_wise(self, p):
         clipper = ClipIntensityPercentilesd(keys=[key], upper=95, lower=5, channel_wise=True)
         im = p(self.imt)
         result = clipper({key: im})
-        for i, c in enumerate(im):
+        im_t = convert_to_tensor(im)
+        for i, c in enumerate(im_t):
             lower, upper = percentile(c, (5, 95))
             expected = clip(c, lower, upper)
             assert_allclose(result[key][i], p(expected), type_test="tensor", rtol=1e-4, atol=0)
diff --git a/tests/test_controlnet_inferers.py b/tests/test_controlnet_inferers.py
index 96e707acb5..e3b0aeb5a2 100644
--- a/tests/test_controlnet_inferers.py
+++ b/tests/test_controlnet_inferers.py
@@ -663,6 +663,8 @@ def test_prediction_shape(
         input_shape,
         latent_shape,
     ):
+        stage_1 = None
+
         if ae_model_type == "AutoencoderKL":
             stage_1 = AutoencoderKL(**autoencoder_params)
         if ae_model_type == "VQVAE":
@@ -730,6 +732,8 @@ def test_sample_shape(
         input_shape,
         latent_shape,
     ):
+        stage_1 = None
+
         if ae_model_type == "AutoencoderKL":
             stage_1 = AutoencoderKL(**autoencoder_params)
         if ae_model_type == "VQVAE":
@@ -793,6 +797,8 @@ def test_sample_intermediates(
         input_shape,
         latent_shape,
     ):
+        stage_1 = None
+
         if ae_model_type == "AutoencoderKL":
             stage_1 = AutoencoderKL(**autoencoder_params)
         if ae_model_type == "VQVAE":
@@ -835,6 +841,10 @@ def test_sample_intermediates(
                 controlnet=controlnet,
                 cn_cond=mask,
             )
+
+            # TODO: this isn't correct, should the above produce intermediates as well?
+            # This test has always passed so is this branch not being used?
+            intermediates = None
         else:
             sample, intermediates = inferer.sample(
                 input_noise=noise,
@@ -846,6 +856,7 @@ def test_sample_intermediates(
                 controlnet=controlnet,
                 cn_cond=mask,
             )
+
         self.assertEqual(len(intermediates), 10)
         self.assertEqual(intermediates[0].shape, input_shape)
 
@@ -861,6 +872,8 @@ def test_get_likelihoods(
         input_shape,
         latent_shape,
     ):
+        stage_1 = None
+
         if ae_model_type == "AutoencoderKL":
             stage_1 = AutoencoderKL(**autoencoder_params)
         if ae_model_type == "VQVAE":
@@ -929,6 +942,8 @@ def test_resample_likelihoods(
         input_shape,
         latent_shape,
     ):
+        stage_1 = None
+
         if ae_model_type == "AutoencoderKL":
             stage_1 = AutoencoderKL(**autoencoder_params)
         if ae_model_type == "VQVAE":
@@ -999,6 +1014,8 @@ def test_prediction_shape_conditioned_concat(
         input_shape,
         latent_shape,
     ):
+        stage_1 = None
+
         if ae_model_type == "AutoencoderKL":
             stage_1 = AutoencoderKL(**autoencoder_params)
         if ae_model_type == "VQVAE":
@@ -1080,6 +1097,8 @@ def test_sample_shape_conditioned_concat(
         input_shape,
         latent_shape,
     ):
+        stage_1 = None
+
         if ae_model_type == "AutoencoderKL":
             stage_1 = AutoencoderKL(**autoencoder_params)
         if ae_model_type == "VQVAE":
@@ -1156,6 +1175,8 @@ def test_sample_shape_different_latents(
         input_shape,
         latent_shape,
     ):
+        stage_1 = None
+
         if ae_model_type == "AutoencoderKL":
             stage_1 = AutoencoderKL(**autoencoder_params)
         if ae_model_type == "VQVAE":
diff --git a/tests/test_dataset.py b/tests/test_dataset.py
index 1398009c63..0d37ae2efd 100644
--- a/tests/test_dataset.py
+++ b/tests/test_dataset.py
@@ -23,7 +23,7 @@
 from parameterized import parameterized
 
 from monai.data import Dataset
-from monai.transforms import Compose, LoadImaged, SimulateDelayd
+from monai.transforms import Compose, Lambda, LoadImage, LoadImaged, SimulateDelay, SimulateDelayd
 from tests.test_compose import TEST_COMPOSE_LAZY_ON_CALL_LOGGING_TEST_CASES, data_from_keys
 
 TEST_CASE_1 = [(128, 128, 128)]
@@ -99,6 +99,72 @@ def test_dataset_lazy_on_call(self):
         data[0, 0:2, 0:2] = 1
 
 
+class TestTupleDataset(unittest.TestCase):
+
+    @parameterized.expand([TEST_CASE_1])
+    def test_shape(self, expected_shape):
+        test_image = nib.Nifti1Image(np.random.randint(0, 2, size=[128, 128, 128]).astype(float), np.eye(4))
+        with tempfile.TemporaryDirectory() as tempdir:
+            nib.save(test_image, os.path.join(tempdir, "test_image1.nii.gz"))
+            nib.save(test_image, os.path.join(tempdir, "test_label1.nii.gz"))
+            nib.save(test_image, os.path.join(tempdir, "test_image2.nii.gz"))
+            nib.save(test_image, os.path.join(tempdir, "test_label2.nii.gz"))
+            test_data = [
+                (os.path.join(tempdir, "test_image1.nii.gz"), os.path.join(tempdir, "test_label1.nii.gz")),
+                (os.path.join(tempdir, "test_image2.nii.gz"), os.path.join(tempdir, "test_label2.nii.gz")),
+            ]
+
+            test_transform = Compose([LoadImage(), SimulateDelay(delay_time=1e-5)])
+
+            # Here test_transform is applied element by element for the tuple.
+            dataset = Dataset(data=test_data, transform=test_transform)
+            data1 = dataset[0]
+            data2 = dataset[1]
+
+            # Output is a list/tuple
+            self.assertTrue(isinstance(data1, (list, tuple)))
+            self.assertTrue(isinstance(data2, (list, tuple)))
+
+            # Number of elements are 2
+            self.assertEqual(len(data1), 2)
+            self.assertEqual(len(data2), 2)
+
+            # Output shapes are as expected
+            self.assertTupleEqual(data1[0].shape, expected_shape)
+            self.assertTupleEqual(data1[1].shape, expected_shape)
+            self.assertTupleEqual(data2[0].shape, expected_shape)
+            self.assertTupleEqual(data2[1].shape, expected_shape)
+
+            # Here test_transform is applied to the tuple as a whole.
+            test_transform = Compose(
+                [
+                    # LoadImage creates a channel-stacked image when applied to a tuple
+                    LoadImage(),
+                    # Get the channel-stacked image and the label
+                    Lambda(func=lambda x: (x[0].permute(2, 1, 0), x[1])),
+                ],
+                map_items=False,
+            )
+
+            dataset = Dataset(data=test_data, transform=test_transform)
+            data1 = dataset[0]
+            data2 = dataset[1]
+
+            # Output is a list/tuple
+            self.assertTrue(isinstance(data1, (list, tuple)))
+            self.assertTrue(isinstance(data2, (list, tuple)))
+
+            # Number of elements are 2
+            self.assertEqual(len(data1), 2)
+            self.assertEqual(len(data2), 2)
+
+            # Output shapes are as expected
+            self.assertTupleEqual(data1[0].shape, expected_shape)
+            self.assertTupleEqual(data1[1].shape, expected_shape)
+            self.assertTupleEqual(data2[0].shape, expected_shape)
+            self.assertTupleEqual(data2[1].shape, expected_shape)
+
+
 class TestDatsesetWithLazy(unittest.TestCase):
     LOGGER_NAME = "a_logger_name"
 
diff --git a/tests/test_ensure_channel_first.py b/tests/test_ensure_channel_first.py
index 0c9ad5869e..fe046a4cdf 100644
--- a/tests/test_ensure_channel_first.py
+++ b/tests/test_ensure_channel_first.py
@@ -50,9 +50,10 @@ class TestEnsureChannelFirst(unittest.TestCase):
     @parameterized.expand([TEST_CASE_1, TEST_CASE_2, TEST_CASE_3, TEST_CASE_4, TEST_CASE_5, TEST_CASE_6])
     @unittest.skipUnless(has_itk, "itk not installed")
     def test_load_nifti(self, input_param, filenames, original_channel_dim):
-        if original_channel_dim is None:
-            test_image = np.random.rand(8, 8, 8)
-        elif original_channel_dim == -1:
+        # if original_channel_dim is None
+        test_image = np.random.rand(8, 8, 8)
+
+        if original_channel_dim == -1:
             test_image = np.random.rand(8, 8, 8, 1)
 
         with tempfile.TemporaryDirectory() as tempdir:
diff --git a/tests/test_ensure_channel_firstd.py b/tests/test_ensure_channel_firstd.py
index 63a437894b..e9effad951 100644
--- a/tests/test_ensure_channel_firstd.py
+++ b/tests/test_ensure_channel_firstd.py
@@ -35,9 +35,10 @@ class TestEnsureChannelFirstd(unittest.TestCase):
 
     @parameterized.expand([TEST_CASE_1, TEST_CASE_2, TEST_CASE_3])
     def test_load_nifti(self, input_param, filenames, original_channel_dim):
-        if original_channel_dim is None:
-            test_image = np.random.rand(8, 8, 8)
-        elif original_channel_dim == -1:
+        # if original_channel_dim is None:
+        test_image = np.random.rand(8, 8, 8)
+
+        if original_channel_dim == -1:
             test_image = np.random.rand(8, 8, 8, 1)
 
         with tempfile.TemporaryDirectory() as tempdir:
diff --git a/tests/test_evenly_divisible_all_gather_dist.py b/tests/test_evenly_divisible_all_gather_dist.py
index d6d26c7e23..f1d45ba48f 100644
--- a/tests/test_evenly_divisible_all_gather_dist.py
+++ b/tests/test_evenly_divisible_all_gather_dist.py
@@ -27,10 +27,10 @@ def test_data(self):
         self._run()
 
     def _run(self):
-        if dist.get_rank() == 0:
-            data1 = torch.tensor([[1, 2], [3, 4]])
-            data2 = torch.tensor([[1.0, 2.0]])
-            data3 = torch.tensor(7)
+        # if dist.get_rank() == 0
+        data1 = torch.tensor([[1, 2], [3, 4]])
+        data2 = torch.tensor([[1.0, 2.0]])
+        data3 = torch.tensor(7)
 
         if dist.get_rank() == 1:
             data1 = torch.tensor([[5, 6]])
diff --git a/tests/test_handler_metrics_saver_dist.py b/tests/test_handler_metrics_saver_dist.py
index 46c9ad27d7..2e12b08aa9 100644
--- a/tests/test_handler_metrics_saver_dist.py
+++ b/tests/test_handler_metrics_saver_dist.py
@@ -51,8 +51,10 @@ def _val_func(engine, batch):
 
         engine = Engine(_val_func)
 
+        # define here to ensure symbol always exists regardless of the following if conditions
+        data = [{PostFix.meta("image"): {"filename_or_obj": [fnames[0]]}}]
+
         if my_rank == 0:
-            data = [{PostFix.meta("image"): {"filename_or_obj": [fnames[0]]}}]
 
             @engine.on(Events.EPOCH_COMPLETED)
             def _save_metrics0(engine):
diff --git a/tests/test_hilbert_transform.py b/tests/test_hilbert_transform.py
index 879a74969d..b91ba3f6b7 100644
--- a/tests/test_hilbert_transform.py
+++ b/tests/test_hilbert_transform.py
@@ -19,11 +19,11 @@
 
 from monai.networks.layers import HilbertTransform
 from monai.utils import OptionalImportError
-from tests.utils import SkipIfModule, SkipIfNoModule, skip_if_no_cuda
+from tests.utils import SkipIfModule, SkipIfNoModule
 
 
 def create_expected_numpy_output(input_datum, **kwargs):
-    x = np.fft.fft(input_datum.cpu().numpy() if input_datum.device.type == "cuda" else input_datum.numpy(), **kwargs)
+    x = np.fft.fft(input_datum.cpu().numpy(), **kwargs)
     f = np.fft.fftfreq(x.shape[kwargs["axis"]])
     u = np.heaviside(f, 0.5)
     new_dims_before = kwargs["axis"]
@@ -44,19 +44,15 @@ def create_expected_numpy_output(input_datum, **kwargs):
 # CPU TEST DATA
 
 cpu_input_data = {}
-cpu_input_data["1D"] = torch.as_tensor(hann_windowed_sine, device=cpu).unsqueeze(0).unsqueeze(0)
-cpu_input_data["2D"] = (
-    torch.as_tensor(np.stack([hann_windowed_sine] * 10, axis=1), device=cpu).unsqueeze(0).unsqueeze(0)
-)
-cpu_input_data["3D"] = (
-    torch.as_tensor(np.stack([np.stack([hann_windowed_sine] * 10, axis=1)] * 10, axis=2), device=cpu)
-    .unsqueeze(0)
-    .unsqueeze(0)
-)
-cpu_input_data["1D 2CH"] = torch.as_tensor(np.stack([hann_windowed_sine] * 10, axis=1), device=cpu).unsqueeze(0)
+cpu_input_data["1D"] = torch.as_tensor(hann_windowed_sine, device=cpu)[None, None]
+cpu_input_data["2D"] = torch.as_tensor(np.stack([hann_windowed_sine] * 10, axis=1), device=cpu)[None, None]
+cpu_input_data["3D"] = torch.as_tensor(
+    np.stack([np.stack([hann_windowed_sine] * 10, axis=1)] * 10, axis=2), device=cpu
+)[None, None]
+cpu_input_data["1D 2CH"] = torch.as_tensor(np.stack([hann_windowed_sine] * 10, axis=1), device=cpu)[None]
 cpu_input_data["2D 2CH"] = torch.as_tensor(
     np.stack([np.stack([hann_windowed_sine] * 10, axis=1)] * 10, axis=2), device=cpu
-).unsqueeze(0)
+)[None]
 
 # SINGLE-CHANNEL CPU VALUE TESTS
 
@@ -97,64 +93,21 @@ def create_expected_numpy_output(input_datum, **kwargs):
     1e-5,  # absolute tolerance
 ]
 
+TEST_CASES_CPU = [
+    TEST_CASE_1D_SINE_CPU,
+    TEST_CASE_2D_SINE_CPU,
+    TEST_CASE_3D_SINE_CPU,
+    TEST_CASE_1D_2CH_SINE_CPU,
+    TEST_CASE_2D_2CH_SINE_CPU,
+]
+
 # GPU TEST DATA
 
 if torch.cuda.is_available():
     gpu = torch.device("cuda")
-
-    gpu_input_data = {}
-    gpu_input_data["1D"] = torch.as_tensor(hann_windowed_sine, device=gpu).unsqueeze(0).unsqueeze(0)
-    gpu_input_data["2D"] = (
-        torch.as_tensor(np.stack([hann_windowed_sine] * 10, axis=1), device=gpu).unsqueeze(0).unsqueeze(0)
-    )
-    gpu_input_data["3D"] = (
-        torch.as_tensor(np.stack([np.stack([hann_windowed_sine] * 10, axis=1)] * 10, axis=2), device=gpu)
-        .unsqueeze(0)
-        .unsqueeze(0)
-    )
-    gpu_input_data["1D 2CH"] = torch.as_tensor(np.stack([hann_windowed_sine] * 10, axis=1), device=gpu).unsqueeze(0)
-    gpu_input_data["2D 2CH"] = torch.as_tensor(
-        np.stack([np.stack([hann_windowed_sine] * 10, axis=1)] * 10, axis=2), device=gpu
-    ).unsqueeze(0)
-
-    # SINGLE CHANNEL GPU VALUE TESTS
-
-    TEST_CASE_1D_SINE_GPU = [
-        {},  # args (empty, so use default)
-        gpu_input_data["1D"],  # Input data: Random 1D signal
-        create_expected_numpy_output(gpu_input_data["1D"], axis=2),  # Expected output: FFT of signal
-        1e-5,  # absolute tolerance
-    ]
-
-    TEST_CASE_2D_SINE_GPU = [
-        {},  # args (empty, so use default)
-        gpu_input_data["2D"],  # Input data: Random 1D signal
-        create_expected_numpy_output(gpu_input_data["2D"], axis=2),  # Expected output: FFT of signal
-        1e-5,  # absolute tolerance
-    ]
-
-    TEST_CASE_3D_SINE_GPU = [
-        {},  # args (empty, so use default)
-        gpu_input_data["3D"],  # Input data: Random 1D signal
-        create_expected_numpy_output(gpu_input_data["3D"], axis=2),  # Expected output: FFT of signal
-        1e-5,  # absolute tolerance
-    ]
-
-    # MULTICHANNEL GPU VALUE TESTS, PROCESS ALONG FIRST SPATIAL AXIS
-
-    TEST_CASE_1D_2CH_SINE_GPU = [
-        {},  # args (empty, so use default)
-        gpu_input_data["1D 2CH"],  # Input data: Random 1D signal
-        create_expected_numpy_output(gpu_input_data["1D 2CH"], axis=2),
-        1e-5,  # absolute tolerance
-    ]
-
-    TEST_CASE_2D_2CH_SINE_GPU = [
-        {},  # args (empty, so use default)
-        gpu_input_data["2D 2CH"],  # Input data: Random 1D signal
-        create_expected_numpy_output(gpu_input_data["2D 2CH"], axis=2),
-        1e-5,  # absolute tolerance
-    ]
+    TEST_CASES_GPU = [[args, image.to(gpu), exp_data, atol] for args, image, exp_data, atol in TEST_CASES_CPU]
+else:
+    TEST_CASES_GPU = []
 
 # TESTS CHECKING PADDING, AXIS SELECTION ETC ARE COVERED BY test_detect_envelope.py
 
@@ -162,42 +115,10 @@ def create_expected_numpy_output(input_datum, **kwargs):
 @SkipIfNoModule("torch.fft")
 class TestHilbertTransformCPU(unittest.TestCase):
 
-    @parameterized.expand(
-        [
-            TEST_CASE_1D_SINE_CPU,
-            TEST_CASE_2D_SINE_CPU,
-            TEST_CASE_3D_SINE_CPU,
-            TEST_CASE_1D_2CH_SINE_CPU,
-            TEST_CASE_2D_2CH_SINE_CPU,
-        ]
-    )
-    def test_value(self, arguments, image, expected_data, atol):
-        result = HilbertTransform(**arguments)(image)
-        result = result.squeeze(0).squeeze(0).numpy()
-        np.testing.assert_allclose(result, expected_data.squeeze(), atol=atol)
-
-
-@skip_if_no_cuda
-@SkipIfNoModule("torch.fft")
-class TestHilbertTransformGPU(unittest.TestCase):
-
-    @parameterized.expand(
-        (
-            []
-            if not torch.cuda.is_available()
-            else [
-                TEST_CASE_1D_SINE_GPU,
-                TEST_CASE_2D_SINE_GPU,
-                TEST_CASE_3D_SINE_GPU,
-                TEST_CASE_1D_2CH_SINE_GPU,
-                TEST_CASE_2D_2CH_SINE_GPU,
-            ]
-        ),
-        skip_on_empty=True,
-    )
+    @parameterized.expand(TEST_CASES_CPU + TEST_CASES_GPU)
     def test_value(self, arguments, image, expected_data, atol):
         result = HilbertTransform(**arguments)(image)
-        result = result.squeeze(0).squeeze(0).cpu().numpy()
+        result = np.squeeze(result.cpu().numpy())
         np.testing.assert_allclose(result, expected_data.squeeze(), atol=atol)
 
 
diff --git a/tests/test_integration_unet_2d.py b/tests/test_integration_unet_2d.py
index 918190775c..3b40682de0 100644
--- a/tests/test_integration_unet_2d.py
+++ b/tests/test_integration_unet_2d.py
@@ -35,6 +35,7 @@ def __getitem__(self, _unused_id):
         def __len__(self):
             return train_steps
 
+    net = None
     if net_name == "basicunet":
         net = BasicUNet(spatial_dims=2, in_channels=1, out_channels=1, features=(4, 8, 8, 16, 16, 32))
     elif net_name == "unet":
diff --git a/tests/test_latent_diffusion_inferer.py b/tests/test_latent_diffusion_inferer.py
index 065ebafd95..2e04ad6c5c 100644
--- a/tests/test_latent_diffusion_inferer.py
+++ b/tests/test_latent_diffusion_inferer.py
@@ -320,6 +320,8 @@ class TestDiffusionSamplingInferer(unittest.TestCase):
     def test_prediction_shape(
         self, ae_model_type, autoencoder_params, dm_model_type, stage_2_params, input_shape, latent_shape
     ):
+        stage_1 = None
+
         if ae_model_type == "AutoencoderKL":
             stage_1 = AutoencoderKL(**autoencoder_params)
         if ae_model_type == "VQVAE":
@@ -368,6 +370,8 @@ def test_prediction_shape(
     def test_sample_shape(
         self, ae_model_type, autoencoder_params, dm_model_type, stage_2_params, input_shape, latent_shape
     ):
+        stage_1 = None
+
         if ae_model_type == "AutoencoderKL":
             stage_1 = AutoencoderKL(**autoencoder_params)
         if ae_model_type == "VQVAE":
@@ -413,6 +417,8 @@ def test_sample_shape(
     def test_sample_intermediates(
         self, ae_model_type, autoencoder_params, dm_model_type, stage_2_params, input_shape, latent_shape
     ):
+        stage_1 = None
+
         if ae_model_type == "AutoencoderKL":
             stage_1 = AutoencoderKL(**autoencoder_params)
         if ae_model_type == "VQVAE":
@@ -468,6 +474,8 @@ def test_sample_intermediates(
     def test_get_likelihoods(
         self, ae_model_type, autoencoder_params, dm_model_type, stage_2_params, input_shape, latent_shape
     ):
+        stage_1 = None
+
         if ae_model_type == "AutoencoderKL":
             stage_1 = AutoencoderKL(**autoencoder_params)
         if ae_model_type == "VQVAE":
@@ -521,6 +529,8 @@ def test_get_likelihoods(
     def test_resample_likelihoods(
         self, ae_model_type, autoencoder_params, dm_model_type, stage_2_params, input_shape, latent_shape
     ):
+        stage_1 = None
+
         if ae_model_type == "AutoencoderKL":
             stage_1 = AutoencoderKL(**autoencoder_params)
         if ae_model_type == "VQVAE":
@@ -576,6 +586,8 @@ def test_resample_likelihoods(
     def test_prediction_shape_conditioned_concat(
         self, ae_model_type, autoencoder_params, dm_model_type, stage_2_params, input_shape, latent_shape
     ):
+        stage_1 = None
+
         if ae_model_type == "AutoencoderKL":
             stage_1 = AutoencoderKL(**autoencoder_params)
         if ae_model_type == "VQVAE":
@@ -642,6 +654,8 @@ def test_prediction_shape_conditioned_concat(
     def test_sample_shape_conditioned_concat(
         self, ae_model_type, autoencoder_params, dm_model_type, stage_2_params, input_shape, latent_shape
     ):
+        stage_1 = None
+
         if ae_model_type == "AutoencoderKL":
             stage_1 = AutoencoderKL(**autoencoder_params)
         if ae_model_type == "VQVAE":
@@ -703,6 +717,8 @@ def test_sample_shape_conditioned_concat(
     def test_sample_shape_different_latents(
         self, ae_model_type, autoencoder_params, dm_model_type, stage_2_params, input_shape, latent_shape
     ):
+        stage_1 = None
+
         if ae_model_type == "AutoencoderKL":
             stage_1 = AutoencoderKL(**autoencoder_params)
         if ae_model_type == "VQVAE":
diff --git a/tests/test_map_and_generate_sampling_centers.py b/tests/test_map_and_generate_sampling_centers.py
new file mode 100644
index 0000000000..ff74f974b9
--- /dev/null
+++ b/tests/test_map_and_generate_sampling_centers.py
@@ -0,0 +1,87 @@
+# Copyright (c) MONAI Consortium
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#     http://www.apache.org/licenses/LICENSE-2.0
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+from __future__ import annotations
+
+import unittest
+from copy import deepcopy
+
+import numpy as np
+from parameterized import parameterized
+
+from monai.transforms import map_and_generate_sampling_centers
+from monai.utils.misc import set_determinism
+from tests.utils import TEST_NDARRAYS, assert_allclose
+
+TEST_CASE_1 = [
+    # test Argmax data
+    {
+        "label": (np.array([[[0, 1, 2], [2, 0, 1], [1, 2, 0]]])),
+        "spatial_size": [2, 2, 2],
+        "num_samples": 2,
+        "label_spatial_shape": [3, 3, 3],
+        "num_classes": 3,
+        "image": None,
+        "ratios": [0, 1, 2],
+        "image_threshold": 0.0,
+    },
+    tuple,
+    2,
+    3,
+]
+
+TEST_CASE_2 = [
+    {
+        "label": (
+            np.array(
+                [
+                    [[1, 0, 0], [0, 1, 0], [0, 0, 1]],
+                    [[0, 1, 0], [0, 0, 1], [1, 0, 0]],
+                    [[0, 0, 1], [1, 0, 0], [0, 1, 0]],
+                ]
+            )
+        ),
+        "spatial_size": [2, 2, 2],
+        "num_samples": 1,
+        "ratios": None,
+        "label_spatial_shape": [3, 3, 3],
+        "image": None,
+        "image_threshold": 0.0,
+    },
+    tuple,
+    1,
+    3,
+]
+
+
+class TestMapAndGenerateSamplingCenters(unittest.TestCase):
+
+    @parameterized.expand([TEST_CASE_1, TEST_CASE_2])
+    def test_map_and_generate_sampling_centers(self, input_data, expected_type, expected_count, expected_shape):
+        results = []
+        for p in TEST_NDARRAYS + (None,):
+            input_data = deepcopy(input_data)
+            if p is not None:
+                input_data["label"] = p(input_data["label"])
+            set_determinism(0)
+            result = map_and_generate_sampling_centers(**input_data)
+            self.assertIsInstance(result, expected_type)
+            self.assertEqual(len(result), expected_count)
+            self.assertEqual(len(result[0]), expected_shape)
+            # check for consistency between numpy, torch and torch.cuda
+            results.append(result)
+            if len(results) > 1:
+                for x, y in zip(result[0], result[-1]):
+                    assert_allclose(x, y, type_test=False)
+
+
+if __name__ == "__main__":
+    unittest.main()
diff --git a/tests/test_pad_collation.py b/tests/test_pad_collation.py
index 17f49611df..9d5012c9a3 100644
--- a/tests/test_pad_collation.py
+++ b/tests/test_pad_collation.py
@@ -89,7 +89,7 @@ def tearDown(self) -> None:
 
     @parameterized.expand(TESTS)
     def test_pad_collation(self, t_type, collate_method, transform):
-        if t_type == dict:
+        if t_type is dict:
             dataset = CacheDataset(self.dict_data, transform, progress=False)
         else:
             dataset = _Dataset(self.list_data, self.list_labels, transform)
@@ -104,7 +104,7 @@ def test_pad_collation(self, t_type, collate_method, transform):
         loader = DataLoader(dataset, batch_size=10, collate_fn=collate_method)
         # check collation in forward direction
         for data in loader:
-            if t_type == dict:
+            if t_type is dict:
                 shapes = []
                 decollated_data = decollate_batch(data)
                 for d in decollated_data:
@@ -113,7 +113,7 @@ def test_pad_collation(self, t_type, collate_method, transform):
                     self.assertTrue(len(output["image"].applied_operations), len(dataset.transform.transforms))
                 self.assertTrue(len(set(shapes)) > 1)  # inverted shapes must be different because of random xforms
 
-        if t_type == dict:
+        if t_type is dict:
             batch_inverse = BatchInverseTransform(dataset.transform, loader)
             for data in loader:
                 output = batch_inverse(data)
diff --git a/tests/test_profiling.py b/tests/test_profiling.py
index 6bee7ba262..649d980ebf 100644
--- a/tests/test_profiling.py
+++ b/tests/test_profiling.py
@@ -35,6 +35,7 @@ def setUp(self):
 
         self.scale = mt.ScaleIntensity()
         self.scale_call_name = "ScaleIntensity.__call__"
+        self.compose_call_name = "Compose.__call__"
         self.test_comp = mt.Compose([mt.ScaleIntensity(), mt.RandAxisFlip(0.5)])
         self.test_image = torch.rand(1, 16, 16, 16)
         self.pid = os.getpid()
@@ -82,7 +83,7 @@ def test_profile_multithread(self):
         self.assertSequenceEqual(batch.shape, (4, 1, 16, 16, 16))
 
         results = wp.get_results()
-        self.assertSequenceEqual(list(results), [self.scale_call_name])
+        self.assertSequenceEqual(list(results), [self.scale_call_name, self.compose_call_name])
 
         prs = results[self.scale_call_name]
 
@@ -98,6 +99,7 @@ def test_profile_context(self):
                 self.scale(self.test_image)
 
         results = wp.get_results()
+
         self.assertSequenceEqual(set(results), {"ScaleIntensity.__call__", "context"})
 
         prs = results["context"]
diff --git a/tests/test_reg_loss_integration.py b/tests/test_reg_loss_integration.py
index 1fb81689e6..8afc2da6ad 100644
--- a/tests/test_reg_loss_integration.py
+++ b/tests/test_reg_loss_integration.py
@@ -83,6 +83,9 @@ def forward(self, x):
         # initialize a SGD optimizer
         optimizer = optim.Adam(net.parameters(), lr=learning_rate)
 
+        # declare first for pylint
+        init_loss = None
+
         # train the network
         for it in range(max_iter):
             # set the gradient to zero
diff --git a/tests/test_regularization.py b/tests/test_regularization.py
index 4df60b9808..12d64637d5 100644
--- a/tests/test_regularization.py
+++ b/tests/test_regularization.py
@@ -13,28 +13,31 @@
 
 import unittest
 
+import numpy as np
 import torch
 
-from monai.transforms import CutMix, CutMixd, CutOut, MixUp, MixUpd
-from monai.utils import set_determinism
+from monai.transforms import CutMix, CutMixd, CutOut, CutOutd, MixUp, MixUpd
+from tests.utils import assert_allclose
 
 
 class TestMixup(unittest.TestCase):
 
-    def setUp(self) -> None:
-        set_determinism(seed=0)
-
-    def tearDown(self) -> None:
-        set_determinism(None)
-
     def test_mixup(self):
         for dims in [2, 3]:
             shape = (6, 3) + (32,) * dims
             sample = torch.rand(*shape, dtype=torch.float32)
             mixup = MixUp(6, 1.0)
+            mixup.set_random_state(seed=0)
             output = mixup(sample)
+            np.random.seed(0)
+            # simulate the randomize() of transform
+            np.random.random()
+            weight = torch.from_numpy(np.random.beta(1.0, 1.0, 6)).type(torch.float32)
+            perm = np.random.permutation(6)
             self.assertEqual(output.shape, sample.shape)
-            self.assertTrue(any(not torch.allclose(sample, mixup(sample)) for _ in range(10)))
+            mixweight = weight[(Ellipsis,) + (None,) * (dims + 1)]
+            expected = mixweight * sample + (1 - mixweight) * sample[perm, ...]
+            assert_allclose(output, expected, type_test=False, atol=1e-7)
 
         with self.assertRaises(ValueError):
             MixUp(6, -0.5)
@@ -52,8 +55,19 @@ def test_mixupd(self):
             t = torch.rand(*shape, dtype=torch.float32)
             sample = {"a": t, "b": t}
             mixup = MixUpd(["a", "b"], 6)
+            mixup.set_random_state(seed=0)
             output = mixup(sample)
-            self.assertTrue(torch.allclose(output["a"], output["b"]))
+            np.random.seed(0)
+            # simulate the randomize() of transform
+            np.random.random()
+            weight = torch.from_numpy(np.random.beta(1.0, 1.0, 6)).type(torch.float32)
+            perm = np.random.permutation(6)
+            self.assertEqual(output["a"].shape, sample["a"].shape)
+            mixweight = weight[(Ellipsis,) + (None,) * (dims + 1)]
+            expected = mixweight * sample["a"] + (1 - mixweight) * sample["a"][perm, ...]
+            assert_allclose(output["a"], expected, type_test=False, atol=1e-7)
+            assert_allclose(output["a"], output["b"], type_test=False, atol=1e-7)
+            # self.assertTrue(torch.allclose(output["a"], output["b"]))
 
         with self.assertRaises(ValueError):
             MixUpd(["k1", "k2"], 6, -0.5)
@@ -61,17 +75,12 @@ def test_mixupd(self):
 
 class TestCutMix(unittest.TestCase):
 
-    def setUp(self) -> None:
-        set_determinism(seed=0)
-
-    def tearDown(self) -> None:
-        set_determinism(None)
-
     def test_cutmix(self):
         for dims in [2, 3]:
             shape = (6, 3) + (32,) * dims
             sample = torch.rand(*shape, dtype=torch.float32)
             cutmix = CutMix(6, 1.0)
+            cutmix.set_random_state(seed=0)
             output = cutmix(sample)
             self.assertEqual(output.shape, sample.shape)
             self.assertTrue(any(not torch.allclose(sample, cutmix(sample)) for _ in range(10)))
@@ -83,29 +92,50 @@ def test_cutmixd(self):
             label = torch.randint(0, 1, shape)
             sample = {"a": t, "b": t, "lbl1": label, "lbl2": label}
             cutmix = CutMixd(["a", "b"], 6, label_keys=("lbl1", "lbl2"))
+            cutmix.set_random_state(seed=123)
             output = cutmix(sample)
-            # croppings are different on each application
-            self.assertTrue(not torch.allclose(output["a"], output["b"]))
             # but mixing of labels is not affected by it
             self.assertTrue(torch.allclose(output["lbl1"], output["lbl2"]))
 
 
 class TestCutOut(unittest.TestCase):
 
-    def setUp(self) -> None:
-        set_determinism(seed=0)
-
-    def tearDown(self) -> None:
-        set_determinism(None)
-
     def test_cutout(self):
         for dims in [2, 3]:
             shape = (6, 3) + (32,) * dims
             sample = torch.rand(*shape, dtype=torch.float32)
             cutout = CutOut(6, 1.0)
+            cutout.set_random_state(seed=123)
             output = cutout(sample)
+            np.random.seed(123)
+            # simulate the randomize() of transform
+            np.random.random()
+            weight = torch.from_numpy(np.random.beta(1.0, 1.0, 6)).type(torch.float32)
+            perm = np.random.permutation(6)
+            coords = [torch.from_numpy(np.random.randint(0, d, size=(1,))) for d in sample.shape[2:]]
+            assert_allclose(weight, cutout._params[0])
+            assert_allclose(perm, cutout._params[1])
+            self.assertSequenceEqual(coords, cutout._params[2])
             self.assertEqual(output.shape, sample.shape)
-            self.assertTrue(any(not torch.allclose(sample, cutout(sample)) for _ in range(10)))
+
+    def test_cutoutd(self):
+        for dims in [2, 3]:
+            shape = (6, 3) + (32,) * dims
+            t = torch.rand(*shape, dtype=torch.float32)
+            sample = {"a": t, "b": t}
+            cutout = CutOutd(["a", "b"], 6, 1.0)
+            cutout.set_random_state(seed=123)
+            output = cutout(sample)
+            np.random.seed(123)
+            # simulate the randomize() of transform
+            np.random.random()
+            weight = torch.from_numpy(np.random.beta(1.0, 1.0, 6)).type(torch.float32)
+            perm = np.random.permutation(6)
+            coords = [torch.from_numpy(np.random.randint(0, d, size=(1,))) for d in t.shape[2:]]
+            assert_allclose(weight, cutout.cutout._params[0])
+            assert_allclose(perm, cutout.cutout._params[1])
+            self.assertSequenceEqual(coords, cutout.cutout._params[2])
+            self.assertEqual(output["a"].shape, sample["a"].shape)
 
 
 if __name__ == "__main__":
diff --git a/tests/test_resnet.py b/tests/test_resnet.py
index 5d34a32d8d..e873f1238a 100644
--- a/tests/test_resnet.py
+++ b/tests/test_resnet.py
@@ -107,6 +107,7 @@
         "num_classes": 3,
         "conv1_t_size": [3],
         "conv1_t_stride": 1,
+        "act": ("relu", {"inplace": False}),
     },
     (1, 2, 32),
     (1, 3),
@@ -185,13 +186,46 @@
     (1, 3),
 ]
 
+TEST_CASE_8 = [
+    {
+        "block": "bottleneck",
+        "layers": [3, 4, 6, 3],
+        "block_inplanes": [64, 128, 256, 512],
+        "spatial_dims": 1,
+        "n_input_channels": 2,
+        "num_classes": 3,
+        "conv1_t_size": [3],
+        "conv1_t_stride": 1,
+        "act": ("relu", {"inplace": False}),
+    },
+    (1, 2, 32),
+    (1, 3),
+]
+
+TEST_CASE_9 = [  # Layer norm
+    {
+        "block": ResNetBlock,
+        "layers": [3, 4, 6, 3],
+        "block_inplanes": [64, 128, 256, 512],
+        "spatial_dims": 1,
+        "n_input_channels": 2,
+        "num_classes": 3,
+        "conv1_t_size": [3],
+        "conv1_t_stride": 1,
+        "act": ("relu", {"inplace": False}),
+        "norm": ("layer", {"normalized_shape": (64, 32)}),
+    },
+    (1, 2, 32),
+    (1, 3),
+]
+
 TEST_CASES = []
 PRETRAINED_TEST_CASES = []
 for case in [TEST_CASE_1, TEST_CASE_2, TEST_CASE_3, TEST_CASE_2_A, TEST_CASE_3_A]:
     for model in [resnet10, resnet18, resnet34, resnet50, resnet101, resnet152, resnet200]:
         TEST_CASES.append([model, *case])
         PRETRAINED_TEST_CASES.append([model, *case])
-for case in [TEST_CASE_5, TEST_CASE_5_A, TEST_CASE_6, TEST_CASE_7]:
+for case in [TEST_CASE_5, TEST_CASE_5_A, TEST_CASE_6, TEST_CASE_7, TEST_CASE_8, TEST_CASE_9]:
     TEST_CASES.append([ResNet, *case])
 
 TEST_SCRIPT_CASES = [
@@ -207,15 +241,6 @@
 ]
 
 
-CASE_EXTRACT_FEATURES = [
-    (
-        {"model_name": "resnet10", "pretrained": True, "spatial_dims": 3, "in_channels": 1},
-        [1, 1, 64, 64, 64],
-        ([1, 64, 32, 32, 32], [1, 64, 16, 16, 16], [1, 128, 8, 8, 8], [1, 256, 4, 4, 4], [1, 512, 2, 2, 2]),
-    )
-]
-
-
 class TestResNet(unittest.TestCase):
 
     def setUp(self):
diff --git a/tests/test_synthetic.py b/tests/test_synthetic.py
index 7db3c3e77a..4ab2144568 100644
--- a/tests/test_synthetic.py
+++ b/tests/test_synthetic.py
@@ -47,7 +47,7 @@ def test_create_test_image(self, dim, input_param, expected_img, expected_seg, e
         set_determinism(seed=0)
         if dim == 2:
             img, seg = create_test_image_2d(**input_param)
-        elif dim == 3:
+        else:  # dim == 3
             img, seg = create_test_image_3d(**input_param)
         self.assertEqual(img.shape, expected_shape)
         self.assertEqual(seg.max(), expected_max_cls)
diff --git a/tests/test_vis_cam.py b/tests/test_vis_cam.py
index b641599af2..68b12de2f8 100644
--- a/tests/test_vis_cam.py
+++ b/tests/test_vis_cam.py
@@ -70,6 +70,8 @@ class TestClassActivationMap(unittest.TestCase):
 
     @parameterized.expand([TEST_CASE_0, TEST_CASE_1, TEST_CASE_2, TEST_CASE_3])
     def test_shape(self, input_data, expected_shape):
+        model = None
+
         if input_data["model"] == "densenet2d":
             model = DenseNet121(spatial_dims=2, in_channels=1, out_channels=3)
         if input_data["model"] == "densenet3d":
@@ -80,6 +82,7 @@ def test_shape(self, input_data, expected_shape):
             model = SEResNet50(spatial_dims=2, in_channels=3, num_classes=4)
         if input_data["model"] == "senet3d":
             model = SEResNet50(spatial_dims=3, in_channels=3, num_classes=4)
+
         device = "cuda:0" if torch.cuda.is_available() else "cpu"
         model.to(device)
         model.eval()
diff --git a/tests/test_vis_gradcam.py b/tests/test_vis_gradcam.py
index 325b74b3ce..f77d916a5b 100644
--- a/tests/test_vis_gradcam.py
+++ b/tests/test_vis_gradcam.py
@@ -153,6 +153,8 @@ class TestGradientClassActivationMap(unittest.TestCase):
 
     @parameterized.expand(TESTS)
     def test_shape(self, cam_class, input_data, expected_shape):
+        model = None
+
         if input_data["model"] == "densenet2d":
             model = DenseNet121(spatial_dims=2, in_channels=1, out_channels=3)
         elif input_data["model"] == "densenet2d_bin":
diff --git a/tests/test_warp.py b/tests/test_warp.py
index 55f40764c3..0e5f2466db 100644
--- a/tests/test_warp.py
+++ b/tests/test_warp.py
@@ -217,6 +217,7 @@ def itk_warp(img, ddf):
     # warp
     warp_filter.SetDisplacementField(displacement_field)
     warp_filter.SetInput(itk_img)
+    warp_filter.Update()
     warped_img = warp_filter.GetOutput()
     warped_img = np.asarray(warped_img)
 
diff --git a/tests/utils.py b/tests/utils.py
index ea73a3ed81..d1939e590b 100644
--- a/tests/utils.py
+++ b/tests/utils.py
@@ -156,6 +156,7 @@ def skip_if_downloading_fails():
                 "limit",  # HTTP Error 503: Egress is over the account limit
                 "authenticate",
                 "timed out",  # urlopen error [Errno 110] Connection timed out
+                "HTTPError",  # HTTPError: 429 Client Error: Too Many Requests for huggingface hub
             )
         ):
             raise unittest.SkipTest(f"error while downloading: {rt_e}") from rt_e  # incomplete download