diff --git a/.github/workflows/integration.yml b/.github/workflows/integration.yml
index 9894848a53..456fa10c41 100644
--- a/.github/workflows/integration.yml
+++ b/.github/workflows/integration.yml
@@ -9,8 +9,8 @@ jobs:
   integration-py3:
     container:
       image: nvcr.io/nvidia/pytorch:22.04-py3  # CUDA 11.6 py38
-      options: --gpus all  # shm-size 4g works fine
-    runs-on: [self-hosted, linux, x64, integration]
+      options: --gpus "device=0" --ipc host  # shm-size 4g works fine
+    runs-on: [self-hosted, linux, x64, command]
     steps:
     # checkout the pull request branch
     - uses: actions/checkout@v3
@@ -34,7 +34,7 @@ jobs:
       run: |
         which python
         python -m pip install --upgrade pip wheel
-        python -m pip install --upgrade torch torchvision
+        python -m pip install --upgrade torch torchvision torchaudio
         python -m pip install -r requirements-dev.txt
         rm -rf /github/home/.cache/torch/hub/mmars/
     - name: Run integration tests
@@ -43,14 +43,37 @@ jobs:
         git config --global --add safe.directory /__w/MONAI/MONAI
         git clean -ffdx
         nvidia-smi
-        export CUDA_VISIBLE_DEVICES=$(python -m tests.utils | tail -n 1)
+        export CUDA_VISIBLE_DEVICES=$(python -m tests.utils -c 1 | tail -n 1)
         echo $CUDA_VISIBLE_DEVICES
         trap 'if pgrep python; then pkill python; fi;' ERR
-        python -c $'import torch\na,b=torch.zeros(1,device="cuda:0"),torch.zeros(1,device="cuda:1");\nwhile True:print(a,b)' > /dev/null &
+        python -c $'import torch\na=[torch.zeros(1,device=f"cuda:{i}") for i in range(torch.cuda.device_count())];\nwhile True:print(a)' > /dev/null &
         python -c "import torch; print(torch.__version__); print('{} of GPUs available'.format(torch.cuda.device_count()))"
         python -c 'import torch; print(torch.rand(5,3, device=torch.device("cuda:0")))'
+
+        # test auto3dseg
+        BUILD_MONAI=0 ./runtests.sh --build
+        python -m tests.test_auto3dseg_ensemble
+        python -m tests.test_auto3dseg_hpo
+        python -m tests.test_integration_autorunner
+        python -m tests.test_integration_gpu_customization
+
+        # test latest template
+        cd ../
+        git clone --depth 1 --branch main --single-branch https://github.com/Project-MONAI/research-contributions.git
+        ls research-contributions/
+        cp -r research-contributions/auto3dseg/algorithm_templates ../MONAI/
+        cd research-contributions && git log -1 && cd ..
+        export OMP_NUM_THREADS=4
+        export MKL_NUM_THREADS=4
+        export MONAI_TESTING_ALGO_TEMPLATE=algorithm_templates
+        python -m tests.test_auto3dseg_ensemble
+        python -m tests.test_auto3dseg_hpo
+        python -m tests.test_integration_autorunner
+        python -m tests.test_integration_gpu_customization
+
+        # the other tests
         BUILD_MONAI=1 ./runtests.sh --build --net
-        BUILD_MONAI=1 ./runtests.sh --build --unittests --disttests
+        BUILD_MONAI=1 ./runtests.sh --build --unittests
         if pgrep python; then pkill python; fi
       shell: bash
     - name: Add reaction
diff --git a/monai/apps/auto3dseg/auto_runner.py b/monai/apps/auto3dseg/auto_runner.py
index fcd76e02a0..57d3b6c014 100644
--- a/monai/apps/auto3dseg/auto_runner.py
+++ b/monai/apps/auto3dseg/auto_runner.py
@@ -281,7 +281,7 @@ def __init__(
         # determine if we need to analyze, algo_gen or train from cache, unless manually provided
         self.analyze = not self.cache["analyze"] if analyze is None else analyze
         self.algo_gen = not self.cache["algo_gen"] if algo_gen is None else algo_gen
-        self.train = train
+        self.train = not self.cache["train"] if train is None else train
         self.ensemble = ensemble  # last step, no need to check
 
         self.set_training_params()
@@ -758,8 +758,7 @@ def run(self):
             logger.info("Skipping algorithm generation...")
 
         # step 3: algo training
-        auto_train_choice = self.train is None
-        if self.train or (auto_train_choice and not self.cache["train"]):
+        if self.train:
             history = import_bundle_algo_history(self.work_dir, only_trained=False)
 
             if len(history) == 0:
@@ -768,15 +767,10 @@ def run(self):
                     "Possibly the required algorithms generation step was not completed."
                 )
 
-            if auto_train_choice:
-                history = [h for h in history if not h["is_trained"]]  # skip trained
-
-            if len(history) > 0:
-                if not self.hpo:
-                    self._train_algo_in_sequence(history)
-                else:
-                    self._train_algo_in_nni(history)
-
+            if not self.hpo:
+                self._train_algo_in_sequence(history)
+            else:
+                self._train_algo_in_nni(history)
             self.export_cache(train=True)
         else:
             logger.info("Skipping algorithm training...")
@@ -804,4 +798,4 @@ def run(self):
                     self.save_image(pred)
                 logger.info(f"Auto3Dseg ensemble prediction outputs are saved in {self.output_dir}.")
 
-        logger.info("Auto3Dseg pipeline is completed successfully.")
+        logger.info("Auto3Dseg pipeline is complete successfully.")
diff --git a/monai/apps/auto3dseg/bundle_gen.py b/monai/apps/auto3dseg/bundle_gen.py
index 8104a79d15..33a3afd07c 100644
--- a/monai/apps/auto3dseg/bundle_gen.py
+++ b/monai/apps/auto3dseg/bundle_gen.py
@@ -35,7 +35,7 @@
 from monai.utils import ensure_tuple
 
 logger = get_logger(module_name=__name__)
-ALGO_HASH = os.environ.get("MONAI_ALGO_HASH", "4af80e1")
+ALGO_HASH = os.environ.get("MONAI_ALGO_HASH", "7758ad1")
 
 __all__ = ["BundleAlgo", "BundleGen"]
 
diff --git a/monai/apps/auto3dseg/utils.py b/monai/apps/auto3dseg/utils.py
index feadc08808..67cde64a2c 100644
--- a/monai/apps/auto3dseg/utils.py
+++ b/monai/apps/auto3dseg/utils.py
@@ -47,14 +47,11 @@ def import_bundle_algo_history(
         if isinstance(algo, BundleAlgo):  # algo's template path needs override
             algo.template_path = algo_meta_data["template_path"]
 
-        best_metrics = "best_metrics"
-        is_trained = best_metrics in algo_meta_data
-
         if only_trained:
-            if is_trained:
-                history.append({name: algo, "is_trained": is_trained, best_metrics: algo_meta_data[best_metrics]})
+            if "best_metrics" in algo_meta_data:
+                history.append({name: algo})
         else:
-            history.append({name: algo, "is_trained": is_trained, best_metrics: algo_meta_data.get(best_metrics, None)})
+            history.append({name: algo})
 
     return history
 
diff --git a/monai/data/utils.py b/monai/data/utils.py
index 2c035afb3f..d5dddb5d55 100644
--- a/monai/data/utils.py
+++ b/monai/data/utils.py
@@ -17,7 +17,6 @@
 import math
 import os
 import pickle
-import warnings
 from collections import abc, defaultdict
 from collections.abc import Generator, Iterable, Mapping, Sequence, Sized
 from copy import deepcopy
@@ -786,7 +785,6 @@ def rectify_header_sform_qform(img_nii):
             return img_nii
 
     norm = affine_to_spacing(img_nii.affine, r=d)
-    warnings.warn(f"Modifying image pixdim from {pixdim} to {norm}")
 
     img_nii.header.set_zooms(norm)
     return img_nii
diff --git a/tests/test_retinanet.py b/tests/test_retinanet.py
index b43ad49dfd..f143550b91 100644
--- a/tests/test_retinanet.py
+++ b/tests/test_retinanet.py
@@ -97,7 +97,7 @@
         TEST_CASES_TS.append([model, *case])
 
 
-@SkipIfBeforePyTorchVersion((1, 9))
+@SkipIfBeforePyTorchVersion((1, 12))
 @unittest.skipUnless(has_torchvision, "Requires torchvision")
 class TestRetinaNet(unittest.TestCase):
     @parameterized.expand(TEST_CASES)
diff --git a/tests/utils.py b/tests/utils.py
index d195bd2ac5..99ab876244 100644
--- a/tests/utils.py
+++ b/tests/utils.py
@@ -11,6 +11,7 @@
 
 from __future__ import annotations
 
+import argparse
 import copy
 import datetime
 import functools
@@ -784,6 +785,7 @@ def query_memory(n=2):
     bash_string = "nvidia-smi --query-gpu=power.draw,temperature.gpu,memory.used --format=csv,noheader,nounits"
 
     try:
+        print(f"query memory with n={n}")
         p1 = Popen(bash_string.split(), stdout=PIPE)
         output, error = p1.communicate()
         free_memory = [x.split(",") for x in output.decode("utf-8").split("\n")[:-1]]
@@ -842,5 +844,8 @@ def command_line_tests(cmd, copy_env=True):
     TEST_DEVICES.append([torch.device("cuda")])
 
 if __name__ == "__main__":
-    print("\n", query_memory(), sep="\n")  # print to stdout
+    parser = argparse.ArgumentParser(prog="util")
+    parser.add_argument("-c", "--count", default=2, help="max number of gpus")
+    args = parser.parse_args()
+    print("\n", query_memory(int(args.count)), sep="\n")  # print to stdout
     sys.exit(0)