From 1352f87e4c707a43b563f76b67118ca7bd4a6f06 Mon Sep 17 00:00:00 2001
From: Hu Mingzhi <mingzhi.hu@intel.com>
Date: Mon, 8 Aug 2022 00:18:50 +0000
Subject: [PATCH 01/32] Update

---
 .../pytorch/strategies/ipex/ipex_strategy.py  | 38 +++++++++++++++++
 .../test/pytorch/tests/test_trainer_ipex.py   | 41 +++++++++++++++++++
 2 files changed, 79 insertions(+)

diff --git a/python/nano/src/bigdl/nano/pytorch/strategies/ipex/ipex_strategy.py b/python/nano/src/bigdl/nano/pytorch/strategies/ipex/ipex_strategy.py
index 26d23855c44..ec0f80dee03 100644
--- a/python/nano/src/bigdl/nano/pytorch/strategies/ipex/ipex_strategy.py
+++ b/python/nano/src/bigdl/nano/pytorch/strategies/ipex/ipex_strategy.py
@@ -14,13 +14,22 @@
 # limitations under the License.
 #
 
+from contextlib import contextmanager
+from functools import partial
+from typing import Any, Union, Callable
+
 import torch
+from torch.nn import Module
+from torch.optim import Optimizer
+
 import pytorch_lightning as pl
 from pytorch_lightning.strategies import SingleDeviceStrategy
 from pytorch_lightning.accelerators.accelerator import Accelerator
 from pytorch_lightning.plugins.precision import PrecisionPlugin
+
 from bigdl.nano.utils.log4Error import invalidInputError
 import intel_extension_for_pytorch as ipex
+from intel_extension_for_pytorch.optim._optimizer_utils import IPEX_FUSED_OPTIMIZER_LIST
 
 from .ipex_accelerator import IPEXAccelerator
 
@@ -44,6 +53,9 @@ def __init__(
         """
         self.enable_bf16 = enable_bf16
 
+        if enable_bf16 and isinstance(precision_plugin, PrecisionPlugin):
+            precision_plugin = IPEXBF16Precision()
+
         super().__init__(accelerator=accelerator, precision_plugin=precision_plugin)
 
     def setup(self, trainer: pl.Trainer) -> None:
@@ -63,3 +75,29 @@ def setup(self, trainer: pl.Trainer) -> None:
             ipex.optimize(self.model, optimizer=self.optimizers[0], inplace=True, dtype=dtype)
         else:
             invalidInputError(False, "Ipex does not support more than one optimizers.")
+
+
+class IPEXBF16Precision(PrecisionPlugin):
+    """Create Precision Plugin for IPEX BFloat16."""
+
+    @contextmanager
+    def forward_context(self):
+        """PyTorch AMP for managing model forward/training_step/evaluation_step/predict_step."""
+        with torch.cpu.amp.autocast():
+            yield
+
+    def optimizer_step(self,
+                       model: Union["pl.LightningModule", Module],
+                       optimizer: Optimizer,
+                       optimizer_idx: int,
+                       closure: Callable[[], Any],
+                       **kwargs: Any) -> Any:
+        """Hook to run the optimizer step."""
+        if isinstance(model, pl.LightningModule):
+            closure = partial(self._wrap_closure, model, optimizer, optimizer_idx, closure)
+
+        # Automatically call closure for optimizer not supported by IPEX
+        if type(optimizer) not in IPEX_FUSED_OPTIMIZER_LIST:
+            closure()
+
+        return optimizer.step(closure, **kwargs)
diff --git a/python/nano/test/pytorch/tests/test_trainer_ipex.py b/python/nano/test/pytorch/tests/test_trainer_ipex.py
index 16847bcc35a..b5f24be9916 100644
--- a/python/nano/test/pytorch/tests/test_trainer_ipex.py
+++ b/python/nano/test/pytorch/tests/test_trainer_ipex.py
@@ -66,6 +66,47 @@ def test_trainer_save_checkpoint(self):
         pl_model = Trainer.compile(self.model, self.loss, self.optimizer, self.scheduler_dict)
         trainer.fit(pl_model, self.train_loader)
 
+    def test_trainer_ipex_bf16(self):
+        trainer = Trainer(max_epochs=max_epochs, use_ipex=True, enable_bf16=True)
+
+        # use_ipex=True will perform inplace optimization
+        model = ResNet18(10, pretrained=False, include_top=False, freeze=True)
+        optimizer = torch.optim.SGD(model.parameters(), lr=0.01)
+        loss = nn.CrossEntropyLoss()
+        scheduler_dict = {
+            "scheduler": OneCycleLR(
+                optimizer,
+                0.1,
+                epochs=max_epochs,
+                steps_per_epoch=len(self.train_loader),
+            ),
+            "interval": "step",
+        }
+
+        pl_model = Trainer.compile(model, loss, optimizer, scheduler_dict)
+        trainer.fit(pl_model, self.train_loader)
+        trainer.test(pl_model, self.train_loader)
+
+    def test_trainer_ipex_bf16_unspport_optim(self):
+        trainer = Trainer(max_epochs=max_epochs, use_ipex=True, enable_bf16=True)
+
+        model = ResNet18(10, pretrained=False, include_top=False, freeze=True)
+        optimizer = torch.optim.AdamW(model.parameters(), lr=0.01, weight_decay=5e-4)
+        loss = nn.CrossEntropyLoss()
+        scheduler_dict = {
+            "scheduler": OneCycleLR(
+                optimizer,
+                0.1,
+                epochs=max_epochs,
+                steps_per_epoch=len(self.train_loader),
+            ),
+            "interval": "step",
+        }
+
+        pl_model = Trainer.compile(model, loss, optimizer, scheduler_dict)
+        trainer.fit(pl_model, self.train_loader)
+        trainer.test(pl_model, self.train_loader)
+
 
 if __name__ == '__main__':
     pytest.main([__file__])

From 478b1233455fe71d6cf6cb0b4eb2cd0946e7f911 Mon Sep 17 00:00:00 2001
From: Hu Mingzhi <mingzhi.hu@intel.com>
Date: Mon, 8 Aug 2022 02:42:01 +0000
Subject: [PATCH 02/32] Fix code style

---
 .../src/bigdl/nano/pytorch/strategies/ipex/ipex_strategy.py   | 2 +-
 python/nano/test/pytorch/utils/_train_torch_lightning.py      | 4 +++-
 2 files changed, 4 insertions(+), 2 deletions(-)

diff --git a/python/nano/src/bigdl/nano/pytorch/strategies/ipex/ipex_strategy.py b/python/nano/src/bigdl/nano/pytorch/strategies/ipex/ipex_strategy.py
index ec0f80dee03..e22c5bc6990 100644
--- a/python/nano/src/bigdl/nano/pytorch/strategies/ipex/ipex_strategy.py
+++ b/python/nano/src/bigdl/nano/pytorch/strategies/ipex/ipex_strategy.py
@@ -82,7 +82,7 @@ class IPEXBF16Precision(PrecisionPlugin):
 
     @contextmanager
     def forward_context(self):
-        """PyTorch AMP for managing model forward/training_step/evaluation_step/predict_step."""
+        """AMP for managing model forward/training_step/evaluation_step/predict_step."""
         with torch.cpu.amp.autocast():
             yield
 
diff --git a/python/nano/test/pytorch/utils/_train_torch_lightning.py b/python/nano/test/pytorch/utils/_train_torch_lightning.py
index 36b4a2908e6..a2ab126e6c8 100644
--- a/python/nano/test/pytorch/utils/_train_torch_lightning.py
+++ b/python/nano/test/pytorch/utils/_train_torch_lightning.py
@@ -122,5 +122,7 @@ def train_torch_lightning(model, batch_size, num_workers, data_dir, use_ipex=Fal
         else:
             # Frozen parameters should not change
             if not torch.all(torch.eq(para1, para2)):
-                raise Exception(name + " freeze failed.")
+                raise Exception(name + " freeze failed.\n"
+                                + para1 + "\n"
+                                + para2 + "\n")
     print("pass")

From f91c4d655c98a4fcd8dd592f8827ed9765262bbb Mon Sep 17 00:00:00 2001
From: Hu Mingzhi <mingzhi.hu@intel.com>
Date: Mon, 8 Aug 2022 02:56:40 +0000
Subject: [PATCH 03/32] re-run action

---
 python/nano/test/pytorch/utils/_train_torch_lightning.py | 4 +---
 1 file changed, 1 insertion(+), 3 deletions(-)

diff --git a/python/nano/test/pytorch/utils/_train_torch_lightning.py b/python/nano/test/pytorch/utils/_train_torch_lightning.py
index a2ab126e6c8..859cc95c84d 100644
--- a/python/nano/test/pytorch/utils/_train_torch_lightning.py
+++ b/python/nano/test/pytorch/utils/_train_torch_lightning.py
@@ -122,7 +122,5 @@ def train_torch_lightning(model, batch_size, num_workers, data_dir, use_ipex=Fal
         else:
             # Frozen parameters should not change
             if not torch.all(torch.eq(para1, para2)):
-                raise Exception(name + " freeze failed.\n"
-                                + para1 + "\n"
-                                + para2 + "\n")
+                raise Exception(f"{name} freeze failed. \n {para1} \n {para2}")
     print("pass")

From f5af29efc38d9ad1f73ca0fb0feae8f368060537 Mon Sep 17 00:00:00 2001
From: Hu Mingzhi <mingzhi.hu@intel.com>
Date: Mon, 8 Aug 2022 04:00:55 +0000
Subject: [PATCH 04/32] Fix code style

---
 python/nano/test/pytorch/tests/test_trainer_ipex.py   | 11 +++++++++++
 .../nano/test/pytorch/utils/_train_torch_lightning.py |  2 +-
 2 files changed, 12 insertions(+), 1 deletion(-)

diff --git a/python/nano/test/pytorch/tests/test_trainer_ipex.py b/python/nano/test/pytorch/tests/test_trainer_ipex.py
index b5f24be9916..2f8150242da 100644
--- a/python/nano/test/pytorch/tests/test_trainer_ipex.py
+++ b/python/nano/test/pytorch/tests/test_trainer_ipex.py
@@ -26,6 +26,7 @@
 
 from bigdl.nano.pytorch import Trainer
 from bigdl.nano.pytorch.vision.models import vision
+from bigdl.nano.pytorch.utils import TORCH_VERSION_LESS_1_10
 
 batch_size = 256
 max_epochs = 2
@@ -86,6 +87,11 @@ def test_trainer_ipex_bf16(self):
         pl_model = Trainer.compile(model, loss, optimizer, scheduler_dict)
         trainer.fit(pl_model, self.train_loader)
         trainer.test(pl_model, self.train_loader)
+        
+        if TORCH_VERSION_LESS_1_10:
+            import intel_pytorch_extension as ipex
+            # Avoid affecting other tests
+            ipex.enable_auto_mixed_precision(None)
 
     def test_trainer_ipex_bf16_unspport_optim(self):
         trainer = Trainer(max_epochs=max_epochs, use_ipex=True, enable_bf16=True)
@@ -107,6 +113,11 @@ def test_trainer_ipex_bf16_unspport_optim(self):
         trainer.fit(pl_model, self.train_loader)
         trainer.test(pl_model, self.train_loader)
 
+        if TORCH_VERSION_LESS_1_10:
+            import intel_pytorch_extension as ipex
+            # Avoid affecting other tests
+            ipex.enable_auto_mixed_precision(None)
+
 
 if __name__ == '__main__':
     pytest.main([__file__])
diff --git a/python/nano/test/pytorch/utils/_train_torch_lightning.py b/python/nano/test/pytorch/utils/_train_torch_lightning.py
index 859cc95c84d..36b4a2908e6 100644
--- a/python/nano/test/pytorch/utils/_train_torch_lightning.py
+++ b/python/nano/test/pytorch/utils/_train_torch_lightning.py
@@ -122,5 +122,5 @@ def train_torch_lightning(model, batch_size, num_workers, data_dir, use_ipex=Fal
         else:
             # Frozen parameters should not change
             if not torch.all(torch.eq(para1, para2)):
-                raise Exception(f"{name} freeze failed. \n {para1} \n {para2}")
+                raise Exception(name + " freeze failed.")
     print("pass")

From d22b0948c4262db285dffa755486cb79d67c0b58 Mon Sep 17 00:00:00 2001
From: Hu Mingzhi <mingzhi.hu@intel.com>
Date: Tue, 9 Aug 2022 01:48:27 +0000
Subject: [PATCH 05/32] re-run action

---
 .../src/bigdl/nano/pytorch/strategies/ipex/ipex_strategy.py    | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/python/nano/src/bigdl/nano/pytorch/strategies/ipex/ipex_strategy.py b/python/nano/src/bigdl/nano/pytorch/strategies/ipex/ipex_strategy.py
index e22c5bc6990..6bea7380552 100644
--- a/python/nano/src/bigdl/nano/pytorch/strategies/ipex/ipex_strategy.py
+++ b/python/nano/src/bigdl/nano/pytorch/strategies/ipex/ipex_strategy.py
@@ -83,7 +83,8 @@ class IPEXBF16Precision(PrecisionPlugin):
     @contextmanager
     def forward_context(self):
         """AMP for managing model forward/training_step/evaluation_step/predict_step."""
-        with torch.cpu.amp.autocast():
+        # Manually set the dtype
+        with torch.cpu.amp.autocast(dtype=torch.bfloat16):
             yield
 
     def optimizer_step(self,

From b35b2745382863ab3c9f60ff0eec75a287ee45d1 Mon Sep 17 00:00:00 2001
From: Hu Mingzhi <mingzhi.hu@intel.com>
Date: Wed, 10 Aug 2022 00:56:19 +0000
Subject: [PATCH 06/32] Update

---
 .../nano/pytorch/strategies/ipex/ipex_strategy.py   | 13 ++++++++-----
 1 file changed, 8 insertions(+), 5 deletions(-)

diff --git a/python/nano/src/bigdl/nano/pytorch/strategies/ipex/ipex_strategy.py b/python/nano/src/bigdl/nano/pytorch/strategies/ipex/ipex_strategy.py
index 6bea7380552..51137188d3c 100644
--- a/python/nano/src/bigdl/nano/pytorch/strategies/ipex/ipex_strategy.py
+++ b/python/nano/src/bigdl/nano/pytorch/strategies/ipex/ipex_strategy.py
@@ -94,11 +94,14 @@ def optimizer_step(self,
                        closure: Callable[[], Any],
                        **kwargs: Any) -> Any:
         """Hook to run the optimizer step."""
+        if type(optimizer) in IPEX_FUSED_OPTIMIZER_LIST:
+            return super().optimizer_step(model, optimizer, optimizer_idx, closure, **kwargs)
+            
         if isinstance(model, pl.LightningModule):
             closure = partial(self._wrap_closure, model, optimizer, optimizer_idx, closure)
+        
+        closure_result = closure()
+        optimizer.step(closure=None, **kwargs)
+        
+        return closure_result
 
-        # Automatically call closure for optimizer not supported by IPEX
-        if type(optimizer) not in IPEX_FUSED_OPTIMIZER_LIST:
-            closure()
-
-        return optimizer.step(closure, **kwargs)

From 5052561e85dc1c33d89f666922e15e78a4faf579 Mon Sep 17 00:00:00 2001
From: Hu Mingzhi <mingzhi.hu@intel.com>
Date: Wed, 10 Aug 2022 01:06:52 +0000
Subject: [PATCH 07/32] Fix code style

---
 .../bigdl/nano/pytorch/strategies/ipex/ipex_strategy.py    | 7 +++----
 1 file changed, 3 insertions(+), 4 deletions(-)

diff --git a/python/nano/src/bigdl/nano/pytorch/strategies/ipex/ipex_strategy.py b/python/nano/src/bigdl/nano/pytorch/strategies/ipex/ipex_strategy.py
index 51137188d3c..51c4591da76 100644
--- a/python/nano/src/bigdl/nano/pytorch/strategies/ipex/ipex_strategy.py
+++ b/python/nano/src/bigdl/nano/pytorch/strategies/ipex/ipex_strategy.py
@@ -96,12 +96,11 @@ def optimizer_step(self,
         """Hook to run the optimizer step."""
         if type(optimizer) in IPEX_FUSED_OPTIMIZER_LIST:
             return super().optimizer_step(model, optimizer, optimizer_idx, closure, **kwargs)
-            
+
         if isinstance(model, pl.LightningModule):
             closure = partial(self._wrap_closure, model, optimizer, optimizer_idx, closure)
-        
+
         closure_result = closure()
         optimizer.step(closure=None, **kwargs)
-        
-        return closure_result
 
+        return closure_result

From 5df5f0781488fa34d3036ff5632df0bdd8e9808a Mon Sep 17 00:00:00 2001
From: Hu Mingzhi <mingzhi.hu@intel.com>
Date: Wed, 10 Aug 2022 05:43:13 +0000
Subject: [PATCH 08/32] support bf16 multi training

---
 .../src/bigdl/nano/pytorch/strategies/ddp_spawn.py     |  5 +++++
 python/nano/test/pytorch/tests/test_plugin_ipex.py     | 10 ++++++++++
 2 files changed, 15 insertions(+)

diff --git a/python/nano/src/bigdl/nano/pytorch/strategies/ddp_spawn.py b/python/nano/src/bigdl/nano/pytorch/strategies/ddp_spawn.py
index 84e6d29f1d2..413fae8658b 100644
--- a/python/nano/src/bigdl/nano/pytorch/strategies/ddp_spawn.py
+++ b/python/nano/src/bigdl/nano/pytorch/strategies/ddp_spawn.py
@@ -61,6 +61,7 @@
     ipex_optimize, create_IPEXAccelerator, to_cpu
 from bigdl.nano.pytorch.utils import TORCH_VERSION_LESS_1_10
 from bigdl.nano.utils.log4Error import invalidInputError
+from bigdl.nano.pytorch.strategies.ipex.ipex_strategy import IPEXBF16Precision
 
 import logging
 import warnings
@@ -181,6 +182,10 @@ def __init__(
             super().__init__(accelerator=create_IPEXAccelerator(),
                              parallel_devices=parallel_devices,
                              cluster_environment=cluster_environment, **kwargs)
+        elif use_ipex and enable_bf16 and 'precision_plugin' not in kwargs:
+            super().__init__(parallel_devices=parallel_devices,
+                             cluster_environment=cluster_environment,
+                             precision_plugin=IPEXBF16Precision(), **kwargs)
         else:
             super().__init__(parallel_devices=parallel_devices,
                              cluster_environment=cluster_environment, **kwargs)
diff --git a/python/nano/test/pytorch/tests/test_plugin_ipex.py b/python/nano/test/pytorch/tests/test_plugin_ipex.py
index 609e1e449ad..ec73288faa5 100644
--- a/python/nano/test/pytorch/tests/test_plugin_ipex.py
+++ b/python/nano/test/pytorch/tests/test_plugin_ipex.py
@@ -64,6 +64,16 @@ def test_trainer_subprocess_plugin(self):
         trainer.fit(pl_model, self.data_loader, self.test_data_loader)
         trainer.test(pl_model, self.test_data_loader)
 
+    def test_trainer_subprocess_plugin_bf16(self):
+        pl_model = LightningModule(
+            self.model, self.loss, self.optimizer,
+            metrics=[torchmetrics.F1(num_classes), torchmetrics.Accuracy(num_classes=10)]
+        )
+        trainer = Trainer(num_processes=2, distributed_backend="subprocess",
+                          max_epochs=4, use_ipex=True, enable_bf16=True,
+                          callbacks=[CheckIPEXCallback()])
+        trainer.fit(pl_model, self.data_loader, self.test_data_loader)
+        trainer.test(pl_model, self.test_data_loader)
 
 if __name__ == '__main__':
     pytest.main([__file__])

From 3748082f976f62141d084c1fc9447d85814cb62b Mon Sep 17 00:00:00 2001
From: Hu Mingzhi <mingzhi.hu@intel.com>
Date: Wed, 10 Aug 2022 05:56:07 +0000
Subject: [PATCH 09/32] Update

---
 python/nano/src/bigdl/nano/pytorch/strategies/ddp_spawn.py | 2 +-
 python/nano/test/pytorch/tests/test_plugin_ipex.py         | 1 +
 2 files changed, 2 insertions(+), 1 deletion(-)

diff --git a/python/nano/src/bigdl/nano/pytorch/strategies/ddp_spawn.py b/python/nano/src/bigdl/nano/pytorch/strategies/ddp_spawn.py
index 413fae8658b..94e10ff30a3 100644
--- a/python/nano/src/bigdl/nano/pytorch/strategies/ddp_spawn.py
+++ b/python/nano/src/bigdl/nano/pytorch/strategies/ddp_spawn.py
@@ -61,7 +61,6 @@
     ipex_optimize, create_IPEXAccelerator, to_cpu
 from bigdl.nano.pytorch.utils import TORCH_VERSION_LESS_1_10
 from bigdl.nano.utils.log4Error import invalidInputError
-from bigdl.nano.pytorch.strategies.ipex.ipex_strategy import IPEXBF16Precision
 
 import logging
 import warnings
@@ -183,6 +182,7 @@ def __init__(
                              parallel_devices=parallel_devices,
                              cluster_environment=cluster_environment, **kwargs)
         elif use_ipex and enable_bf16 and 'precision_plugin' not in kwargs:
+            from bigdl.nano.pytorch.strategies.ipex.ipex_strategy import IPEXBF16Precision
             super().__init__(parallel_devices=parallel_devices,
                              cluster_environment=cluster_environment,
                              precision_plugin=IPEXBF16Precision(), **kwargs)
diff --git a/python/nano/test/pytorch/tests/test_plugin_ipex.py b/python/nano/test/pytorch/tests/test_plugin_ipex.py
index ec73288faa5..3c2fdd82732 100644
--- a/python/nano/test/pytorch/tests/test_plugin_ipex.py
+++ b/python/nano/test/pytorch/tests/test_plugin_ipex.py
@@ -75,5 +75,6 @@ def test_trainer_subprocess_plugin_bf16(self):
         trainer.fit(pl_model, self.data_loader, self.test_data_loader)
         trainer.test(pl_model, self.test_data_loader)
 
+
 if __name__ == '__main__':
     pytest.main([__file__])

From 4073f20e78b0dd7604c24d7a9668d2c0aa2b3c8f Mon Sep 17 00:00:00 2001
From: Hu Mingzhi <mingzhi.hu@intel.com>
Date: Thu, 11 Aug 2022 14:53:19 +0000
Subject: [PATCH 10/32] Update

---
 .../pytorch/strategies/ipex/ipex_strategy.py  | 26 +++++++++++++++----
 .../src/bigdl/nano/pytorch/trainer/Trainer.py |  7 ++++-
 .../test/pytorch/tests/test_plugin_ipex.py    | 11 +++++---
 .../nano/test/pytorch/tests/test_trainer.py   | 20 ++++++++++++--
 .../test/pytorch/tests/test_trainer_ipex.py   | 12 ++++++---
 .../pytorch/utils/_train_ipex_callback.py     | 14 ++++++++++
 6 files changed, 75 insertions(+), 15 deletions(-)

diff --git a/python/nano/src/bigdl/nano/pytorch/strategies/ipex/ipex_strategy.py b/python/nano/src/bigdl/nano/pytorch/strategies/ipex/ipex_strategy.py
index 51c4591da76..4e583d09b50 100644
--- a/python/nano/src/bigdl/nano/pytorch/strategies/ipex/ipex_strategy.py
+++ b/python/nano/src/bigdl/nano/pytorch/strategies/ipex/ipex_strategy.py
@@ -16,16 +16,18 @@
 
 from contextlib import contextmanager
 from functools import partial
+from logging import warning
 from typing import Any, Union, Callable
 
 import torch
 from torch.nn import Module
-from torch.optim import Optimizer
+from torch.optim import Optimizer, LBFGS
+from bigdl.nano.pytorch.utils import TORCH_VERSION_LESS_1_12
 
 import pytorch_lightning as pl
 from pytorch_lightning.strategies import SingleDeviceStrategy
 from pytorch_lightning.accelerators.accelerator import Accelerator
-from pytorch_lightning.plugins.precision import PrecisionPlugin
+from pytorch_lightning.plugins.precision import PrecisionPlugin, NativeMixedPrecisionPlugin
 
 from bigdl.nano.utils.log4Error import invalidInputError
 import intel_extension_for_pytorch as ipex
@@ -55,7 +57,6 @@ def __init__(
 
         if enable_bf16 and isinstance(precision_plugin, PrecisionPlugin):
             precision_plugin = IPEXBF16Precision()
-
         super().__init__(accelerator=accelerator, precision_plugin=precision_plugin)
 
     def setup(self, trainer: pl.Trainer) -> None:
@@ -83,7 +84,10 @@ class IPEXBF16Precision(PrecisionPlugin):
     @contextmanager
     def forward_context(self):
         """AMP for managing model forward/training_step/evaluation_step/predict_step."""
-        # Manually set the dtype
+        # Using IPEX bf16 and torch.autocast(...) will raise a segmentation fault
+        # in PyTorch 1.11.
+        # torch.autocast("cpu", args...) is equivalent to torch.cpu.amp.autocast(args...)
+        # in PyTorch 1.12.
         with torch.cpu.amp.autocast(dtype=torch.bfloat16):
             yield
 
@@ -100,7 +104,19 @@ def optimizer_step(self,
         if isinstance(model, pl.LightningModule):
             closure = partial(self._wrap_closure, model, optimizer, optimizer_idx, closure)
 
+        # Only `torch.optim.LBFGS`  need to reevaluate closure multiple times
+        # in optimizer.step(...) now.
+        if isinstance(optimizer, LBFGS):
+            invalidInputError(False,
+                              "IPEX BFloat16 and the LBFGS optimizer are not compatible "
+                              f"(optimizer {optimizer_idx}")
+
+        # Detect custom optimzer
+        if type(optimizer).__name__ not in dir(torch.optim):
+            warning("Closure use in optimizer.step(...) is not currently supported"
+                    " if IPEX and BFloat16 are enabled.")
+
         closure_result = closure()
-        optimizer.step(closure=None, **kwargs)
+        optimizer.step(**kwargs)
 
         return closure_result
diff --git a/python/nano/src/bigdl/nano/pytorch/trainer/Trainer.py b/python/nano/src/bigdl/nano/pytorch/trainer/Trainer.py
index de7ba402026..ee6d3fd81a8 100644
--- a/python/nano/src/bigdl/nano/pytorch/trainer/Trainer.py
+++ b/python/nano/src/bigdl/nano/pytorch/trainer/Trainer.py
@@ -57,7 +57,6 @@ class Trainer(pl.Trainer):
 
     def __init__(self, num_processes: int = 1,
                  use_ipex: bool = False,
-                 enable_bf16=False,
                  distributed_backend="subprocess",
                  cpu_for_each_process: Optional[List[List[int]]] = None,
                  use_hpo=False,
@@ -111,6 +110,12 @@ def __init__(self, num_processes: int = 1,
 
         self.use_ipex = use_ipex
 
+        enable_bf16 = False
+
+        if self.use_ipex and kwargs.get('precision', None) == "bf16":
+            # No need to set precision to 32, because Strategy > Accelerator/precision/plugins
+            enable_bf16 = True
+
         if num_processes == 1:
             from bigdl.nano.pytorch.strategies import create_IPEXStrategy
             strategy = create_IPEXStrategy(enable_bf16=enable_bf16) if self.use_ipex else None
diff --git a/python/nano/test/pytorch/tests/test_plugin_ipex.py b/python/nano/test/pytorch/tests/test_plugin_ipex.py
index 3c2fdd82732..a1826c15d00 100644
--- a/python/nano/test/pytorch/tests/test_plugin_ipex.py
+++ b/python/nano/test/pytorch/tests/test_plugin_ipex.py
@@ -27,7 +27,7 @@
 
 from test.pytorch.utils._train_torch_lightning import create_data_loader, data_transform
 from test.pytorch.utils._train_torch_lightning import create_test_data_loader
-from test.pytorch.utils._train_ipex_callback import CheckIPEXCallback
+from test.pytorch.utils._train_ipex_callback import CheckIPEXCallback, CheckIPEXFusedStepCallback
 from test.pytorch.tests.test_lightning import ResNet18
 
 num_classes = 10
@@ -65,13 +65,16 @@ def test_trainer_subprocess_plugin(self):
         trainer.test(pl_model, self.test_data_loader)
 
     def test_trainer_subprocess_plugin_bf16(self):
+        model = ResNet18(pretrained=False, include_top=False, freeze=True)
+        loss = nn.CrossEntropyLoss()
+        optimizer = torch.optim.Adam(model.parameters(), lr=0.01)
         pl_model = LightningModule(
-            self.model, self.loss, self.optimizer,
+            model, loss, optimizer,
             metrics=[torchmetrics.F1(num_classes), torchmetrics.Accuracy(num_classes=10)]
         )
         trainer = Trainer(num_processes=2, distributed_backend="subprocess",
-                          max_epochs=4, use_ipex=True, enable_bf16=True,
-                          callbacks=[CheckIPEXCallback()])
+                          max_epochs=4, use_ipex=True, precision="bf16",
+                          callbacks=[CheckIPEXCallback(), CheckIPEXFusedStepCallback()])
         trainer.fit(pl_model, self.data_loader, self.test_data_loader)
         trainer.test(pl_model, self.test_data_loader)
 
diff --git a/python/nano/test/pytorch/tests/test_trainer.py b/python/nano/test/pytorch/tests/test_trainer.py
index 614c44ef348..824714cbb3c 100644
--- a/python/nano/test/pytorch/tests/test_trainer.py
+++ b/python/nano/test/pytorch/tests/test_trainer.py
@@ -22,11 +22,13 @@
 
 import pytest
 import torch
+from torch.utils.data import DataLoader, TensorDataset
 from pytorch_lightning import LightningModule
+from pytorch_lightning.plugins.precision.native_amp import NativeMixedPrecisionPlugin
+from pytorch_lightning.plugins.precision.double import DoublePrecisionPlugin
 from test.pytorch.utils._train_torch_lightning import create_data_loader, data_transform
 from test.pytorch.utils._train_torch_lightning import train_with_linear_top_layer
 from torch import nn
-import torchmetrics
 
 from bigdl.nano.pytorch import Trainer
 from bigdl.nano.pytorch.vision.models import vision
@@ -78,6 +80,20 @@ def test_trainer_compile(self):
         pl_model = Trainer.compile(self.model, self.loss, self.optimizer)
         trainer.fit(pl_model, self.train_loader)
 
+    def test_trainer_precision_bf16(self):
+        model = ResNet18(10, pretrained=False, include_top=False, freeze=True)
+        loss = nn.CrossEntropyLoss()
+        optimizer = torch.optim.Adam(model.parameters(), lr=0.01)
+        trainer = Trainer(max_epochs=1, precision='bf16')
+        pl_model = Trainer.compile(model, loss, optimizer)
+        trainer.fit(pl_model, self.train_loader)
+        assert isinstance(trainer.strategy.precision_plugin, NativeMixedPrecisionPlugin)
+        # model is not converted to bfloat16 precision
+        input = TensorDataset(torch.rand(1, 3, 32, 32))
+        train_loader = DataLoader(input)
+        y_hat = trainer.predict(pl_model, train_loader)
+        assert y_hat[0].dtype is torch.bfloat16
+
     def test_trainer_save_load(self):
         trainer = Trainer(max_epochs=1)
         pl_model = Trainer.compile(self.model, self.loss, self.optimizer)
@@ -96,7 +112,7 @@ def test_trainer_save_load(self):
         for k in original_state_dict.keys():
             assert (original_state_dict[k] == loaded_state_dict[k]).all()
         shutil.rmtree('saved_model')
-        
+
 
 if __name__ == '__main__':
     pytest.main([__file__])
diff --git a/python/nano/test/pytorch/tests/test_trainer_ipex.py b/python/nano/test/pytorch/tests/test_trainer_ipex.py
index 2f8150242da..2731ddedceb 100644
--- a/python/nano/test/pytorch/tests/test_trainer_ipex.py
+++ b/python/nano/test/pytorch/tests/test_trainer_ipex.py
@@ -15,6 +15,7 @@
 #
 
 
+from gc import callbacks
 import os
 from unittest import TestCase
 
@@ -22,6 +23,7 @@
 import torch
 from torch.optim.lr_scheduler import OneCycleLR
 from test.pytorch.utils._train_torch_lightning import create_data_loader, data_transform
+from test.pytorch.utils._train_ipex_callback import CheckIPEXFusedStepCallback
 from torch import nn
 
 from bigdl.nano.pytorch import Trainer
@@ -68,7 +70,8 @@ def test_trainer_save_checkpoint(self):
         trainer.fit(pl_model, self.train_loader)
 
     def test_trainer_ipex_bf16(self):
-        trainer = Trainer(max_epochs=max_epochs, use_ipex=True, enable_bf16=True)
+        trainer = Trainer(max_epochs=max_epochs, use_ipex=True, precision="bf16",
+                          callbacks=[CheckIPEXFusedStepCallback()])
 
         # use_ipex=True will perform inplace optimization
         model = ResNet18(10, pretrained=False, include_top=False, freeze=True)
@@ -87,14 +90,16 @@ def test_trainer_ipex_bf16(self):
         pl_model = Trainer.compile(model, loss, optimizer, scheduler_dict)
         trainer.fit(pl_model, self.train_loader)
         trainer.test(pl_model, self.train_loader)
-        
+
         if TORCH_VERSION_LESS_1_10:
             import intel_pytorch_extension as ipex
+            # Diable IPEX AMP
             # Avoid affecting other tests
             ipex.enable_auto_mixed_precision(None)
 
     def test_trainer_ipex_bf16_unspport_optim(self):
-        trainer = Trainer(max_epochs=max_epochs, use_ipex=True, enable_bf16=True)
+        trainer = Trainer(max_epochs=max_epochs, use_ipex=True, precision="bf16",
+                          callbacks=[CheckIPEXFusedStepCallback()])
 
         model = ResNet18(10, pretrained=False, include_top=False, freeze=True)
         optimizer = torch.optim.AdamW(model.parameters(), lr=0.01, weight_decay=5e-4)
@@ -115,6 +120,7 @@ def test_trainer_ipex_bf16_unspport_optim(self):
 
         if TORCH_VERSION_LESS_1_10:
             import intel_pytorch_extension as ipex
+            # Diable IPEX AMP
             # Avoid affecting other tests
             ipex.enable_auto_mixed_precision(None)
 
diff --git a/python/nano/test/pytorch/utils/_train_ipex_callback.py b/python/nano/test/pytorch/utils/_train_ipex_callback.py
index 2dd36b59c07..3379c5373ac 100644
--- a/python/nano/test/pytorch/utils/_train_ipex_callback.py
+++ b/python/nano/test/pytorch/utils/_train_ipex_callback.py
@@ -17,6 +17,7 @@
 import torch
 import warnings
 from typing import Dict
+import pytorch_lightning as pl
 from pytorch_lightning.callbacks import Callback
 from pytorch_lightning.plugins.training_type import SingleDevicePlugin, DDPSpawnPlugin
 from bigdl.nano.pytorch.utils import TORCH_VERSION_LESS_1_10
@@ -68,3 +69,16 @@ def check_ipex_layers(m):
                 return False
             assert check_ipex_layers(pl_module)
 
+
+class CheckIPEXFusedStepCallback(Callback):
+    def on_train_start(self, trainer: "pl.Trainer", pl_module: "pl.LightningModule"):
+        if not TORCH_VERSION_LESS_1_10:
+            from intel_extension_for_pytorch.optim._optimizer_utils import IPEX_FUSED_OPTIMIZER_LIST
+            # IPEX only support one optimizer
+            opt = trainer.optimizers[0]
+            if type(opt) in IPEX_FUSED_OPTIMIZER_LIST:
+                assert opt.fused  # type: ignore
+            else:
+                # Check non-fused step
+                assert hasattr(opt, '_original_step')
+                assert getattr(opt, 'step') is not getattr(type(opt), 'step')

From ee8ce0f23fd9502148e88281a7fe336bb0ccf526 Mon Sep 17 00:00:00 2001
From: Hu Mingzhi <mingzhi.hu@intel.com>
Date: Fri, 12 Aug 2022 00:26:30 +0000
Subject: [PATCH 11/32] Update

---
 .../pytorch/strategies/ipex/ipex_strategy.py  |  2 +-
 .../src/bigdl/nano/pytorch/trainer/Trainer.py |  3 ++-
 .../nano/test/pytorch/tests/test_trainer.py   | 23 ++++++++++++-------
 .../test/pytorch/tests/test_trainer_ipex.py   |  1 -
 .../pytorch/utils/_train_ipex_callback.py     | 20 +++++++++-------
 5 files changed, 30 insertions(+), 19 deletions(-)

diff --git a/python/nano/src/bigdl/nano/pytorch/strategies/ipex/ipex_strategy.py b/python/nano/src/bigdl/nano/pytorch/strategies/ipex/ipex_strategy.py
index 4e583d09b50..3ce2b32d952 100644
--- a/python/nano/src/bigdl/nano/pytorch/strategies/ipex/ipex_strategy.py
+++ b/python/nano/src/bigdl/nano/pytorch/strategies/ipex/ipex_strategy.py
@@ -84,7 +84,7 @@ class IPEXBF16Precision(PrecisionPlugin):
     @contextmanager
     def forward_context(self):
         """AMP for managing model forward/training_step/evaluation_step/predict_step."""
-        # Using IPEX bf16 and torch.autocast(...) will raise a segmentation fault
+        # Using IPEX bf16 and torch.autocast(...) reports a segmentation fault
         # in PyTorch 1.11.
         # torch.autocast("cpu", args...) is equivalent to torch.cpu.amp.autocast(args...)
         # in PyTorch 1.12.
diff --git a/python/nano/src/bigdl/nano/pytorch/trainer/Trainer.py b/python/nano/src/bigdl/nano/pytorch/trainer/Trainer.py
index ee6d3fd81a8..dbf90578c98 100644
--- a/python/nano/src/bigdl/nano/pytorch/trainer/Trainer.py
+++ b/python/nano/src/bigdl/nano/pytorch/trainer/Trainer.py
@@ -113,8 +113,9 @@ def __init__(self, num_processes: int = 1,
         enable_bf16 = False
 
         if self.use_ipex and kwargs.get('precision', None) == "bf16":
-            # No need to set precision to 32, because Strategy > Accelerator/precision/plugins
             enable_bf16 = True
+            if TORCH_VERSION_LESS_1_10:
+                kwargs['precision'] = 32
 
         if num_processes == 1:
             from bigdl.nano.pytorch.strategies import create_IPEXStrategy
diff --git a/python/nano/test/pytorch/tests/test_trainer.py b/python/nano/test/pytorch/tests/test_trainer.py
index 824714cbb3c..ceb0e513c4d 100644
--- a/python/nano/test/pytorch/tests/test_trainer.py
+++ b/python/nano/test/pytorch/tests/test_trainer.py
@@ -32,6 +32,7 @@
 
 from bigdl.nano.pytorch import Trainer
 from bigdl.nano.pytorch.vision.models import vision
+from bigdl.nano.pytorch.utils import TORCH_VERSION_LESS_1_10
 
 batch_size = 256
 num_workers = 0
@@ -84,15 +85,21 @@ def test_trainer_precision_bf16(self):
         model = ResNet18(10, pretrained=False, include_top=False, freeze=True)
         loss = nn.CrossEntropyLoss()
         optimizer = torch.optim.Adam(model.parameters(), lr=0.01)
-        trainer = Trainer(max_epochs=1, precision='bf16')
         pl_model = Trainer.compile(model, loss, optimizer)
-        trainer.fit(pl_model, self.train_loader)
-        assert isinstance(trainer.strategy.precision_plugin, NativeMixedPrecisionPlugin)
-        # model is not converted to bfloat16 precision
-        input = TensorDataset(torch.rand(1, 3, 32, 32))
-        train_loader = DataLoader(input)
-        y_hat = trainer.predict(pl_model, train_loader)
-        assert y_hat[0].dtype is torch.bfloat16
+        if TORCH_VERSION_LESS_1_10:
+            trainer = Trainer(max_epochs=1, precision='bf16')
+            trainer.fit(pl_model, self.train_loader)
+            assert isinstance(trainer.strategy.precision_plugin, NativeMixedPrecisionPlugin)
+            # model is not converted to bfloat16 precision
+            input = TensorDataset(torch.rand(1, 3, 32, 32))
+            train_loader = DataLoader(input)
+            y_hat = trainer.predict(pl_model, train_loader)
+            assert y_hat[0].dtype is torch.bfloat16
+        else:
+            trainer = Trainer(max_epochs=1, precision=64)
+            trainer.fit(pl_model, self.train_loader)
+            assert isinstance(trainer.strategy.precision_plugin, DoublePrecisionPlugin)
+            assert optimizer.param_groups[0]['params'][0].dtype is torch.float64
 
     def test_trainer_save_load(self):
         trainer = Trainer(max_epochs=1)
diff --git a/python/nano/test/pytorch/tests/test_trainer_ipex.py b/python/nano/test/pytorch/tests/test_trainer_ipex.py
index 2731ddedceb..897397a08cc 100644
--- a/python/nano/test/pytorch/tests/test_trainer_ipex.py
+++ b/python/nano/test/pytorch/tests/test_trainer_ipex.py
@@ -15,7 +15,6 @@
 #
 
 
-from gc import callbacks
 import os
 from unittest import TestCase
 
diff --git a/python/nano/test/pytorch/utils/_train_ipex_callback.py b/python/nano/test/pytorch/utils/_train_ipex_callback.py
index 3379c5373ac..684289c8606 100644
--- a/python/nano/test/pytorch/utils/_train_ipex_callback.py
+++ b/python/nano/test/pytorch/utils/_train_ipex_callback.py
@@ -26,11 +26,12 @@
 
 class CheckIPEXCallback(Callback):
     def on_train_start(self, trainer, pl_module):
-        if trainer.use_ipex == False:
-           warnings.warn("CheckIPEXCallback is used, but ipex is disabled. ") 
-           return
+        if not trainer.use_ipex:
+            warnings.warn("CheckIPEXCallback is used, but ipex is disabled. ") 
+            return
         if TORCH_VERSION_LESS_1_10:
             from bigdl.nano.deps.ipex.version_1_9.ipex_torchfunctional import RESTORE_TYPE
+
             def check_device(obj):
                 if torch.is_tensor(obj):
                     if obj.device.type == 'xpu':
@@ -46,15 +47,18 @@ def check_device(obj):
             assert check_device(pl_module.state_dict())
         else:
             from intel_extension_for_pytorch.nn.utils._model_convert import _LSTM
-            from intel_extension_for_pytorch.nn.utils._weight_prepack import _IPEXConvNd, _IPEXLinear, _IPEXConvTransposeNd
+            from intel_extension_for_pytorch.nn.utils._weight_prepack import (_IPEXConvNd,
+                                                                              _IPEXLinear,
+                                                                              _IPEXConvTransposeNd)
             IPEX_LAYERS = (_LSTM, 
                            _IPEXConvNd,
                            _IPEXLinear,
                            _IPEXConvTransposeNd)
-            IPEX_ATTR   = ('master_weight',
-                           'weight_trail',
-                           'master_bias',
-                           'bias_trail')
+            IPEX_ATTR = ('master_weight',
+                         'weight_trail',
+                         'master_bias',
+                         'bias_trail')
+
             def check_ipex_layers(m):
                 if isinstance(m, IPEX_LAYERS):
                     print("model is optimized by IPEX")

From 8798671664904c602198df985b7f08ae4ab57dfa Mon Sep 17 00:00:00 2001
From: Hu Mingzhi <mingzhi.hu@intel.com>
Date: Fri, 12 Aug 2022 00:55:29 +0000
Subject: [PATCH 12/32] Update

---
 python/nano/test/pytorch/tests/test_trainer.py | 10 +++++-----
 1 file changed, 5 insertions(+), 5 deletions(-)

diff --git a/python/nano/test/pytorch/tests/test_trainer.py b/python/nano/test/pytorch/tests/test_trainer.py
index ceb0e513c4d..ae8b869ff05 100644
--- a/python/nano/test/pytorch/tests/test_trainer.py
+++ b/python/nano/test/pytorch/tests/test_trainer.py
@@ -87,6 +87,11 @@ def test_trainer_precision_bf16(self):
         optimizer = torch.optim.Adam(model.parameters(), lr=0.01)
         pl_model = Trainer.compile(model, loss, optimizer)
         if TORCH_VERSION_LESS_1_10:
+            trainer = Trainer(max_epochs=1, precision=64)
+            trainer.fit(pl_model, self.train_loader)
+            assert isinstance(trainer.strategy.precision_plugin, DoublePrecisionPlugin)
+            assert optimizer.param_groups[0]['params'][0].dtype is torch.float64
+        else:
             trainer = Trainer(max_epochs=1, precision='bf16')
             trainer.fit(pl_model, self.train_loader)
             assert isinstance(trainer.strategy.precision_plugin, NativeMixedPrecisionPlugin)
@@ -95,11 +100,6 @@ def test_trainer_precision_bf16(self):
             train_loader = DataLoader(input)
             y_hat = trainer.predict(pl_model, train_loader)
             assert y_hat[0].dtype is torch.bfloat16
-        else:
-            trainer = Trainer(max_epochs=1, precision=64)
-            trainer.fit(pl_model, self.train_loader)
-            assert isinstance(trainer.strategy.precision_plugin, DoublePrecisionPlugin)
-            assert optimizer.param_groups[0]['params'][0].dtype is torch.float64
 
     def test_trainer_save_load(self):
         trainer = Trainer(max_epochs=1)

From 152da313c89ff3a7ac8ed49671c369ce5b519142 Mon Sep 17 00:00:00 2001
From: Hu Mingzhi <mingzhi.hu@intel.com>
Date: Mon, 15 Aug 2022 00:10:00 +0000
Subject: [PATCH 13/32] Update

---
 .../nano/pytorch/strategies/ipex/ipex_strategy.py    | 12 +++++++-----
 .../nano/src/bigdl/nano/pytorch/trainer/Trainer.py   |  3 +++
 2 files changed, 10 insertions(+), 5 deletions(-)

diff --git a/python/nano/src/bigdl/nano/pytorch/strategies/ipex/ipex_strategy.py b/python/nano/src/bigdl/nano/pytorch/strategies/ipex/ipex_strategy.py
index 3ce2b32d952..28166f72e8f 100644
--- a/python/nano/src/bigdl/nano/pytorch/strategies/ipex/ipex_strategy.py
+++ b/python/nano/src/bigdl/nano/pytorch/strategies/ipex/ipex_strategy.py
@@ -88,7 +88,7 @@ def forward_context(self):
         # in PyTorch 1.11.
         # torch.autocast("cpu", args...) is equivalent to torch.cpu.amp.autocast(args...)
         # in PyTorch 1.12.
-        with torch.cpu.amp.autocast(dtype=torch.bfloat16):
+        with torch.cpu.amp.autocast():
             yield
 
     def optimizer_step(self,
@@ -109,13 +109,15 @@ def optimizer_step(self,
         if isinstance(optimizer, LBFGS):
             invalidInputError(False,
                               "IPEX BFloat16 and the LBFGS optimizer are not compatible "
-                              f"(optimizer {optimizer_idx}")
+                              f"(optimizer {optimizer_idx}",
+                              "Hint: Set 'use_ipex' to False or not set 'precision' to 'bf16'"
+                              " if LBFGS optimizer is necessary")
 
         # Detect custom optimzer
         if type(optimizer).__name__ not in dir(torch.optim):
-            warning("Closure use in optimizer.step(...) is not currently supported"
-                    " if IPEX and BFloat16 are enabled.")
-
+            warning("Seems like you are using a custom optimizer,"
+                    "please make sure that 'optimizer.step(closure)'"
+                    " does not need to be called in training stage")
         closure_result = closure()
         optimizer.step(**kwargs)
 
diff --git a/python/nano/src/bigdl/nano/pytorch/trainer/Trainer.py b/python/nano/src/bigdl/nano/pytorch/trainer/Trainer.py
index dbf90578c98..9f70187267f 100644
--- a/python/nano/src/bigdl/nano/pytorch/trainer/Trainer.py
+++ b/python/nano/src/bigdl/nano/pytorch/trainer/Trainer.py
@@ -112,8 +112,11 @@ def __init__(self, num_processes: int = 1,
 
         enable_bf16 = False
 
+        # enable precision plugin for IPEX BF16
         if self.use_ipex and kwargs.get('precision', None) == "bf16":
             enable_bf16 = True
+            # No need to set `precision` because strategy has higher priority
+            # than accelerator/plugin
             if TORCH_VERSION_LESS_1_10:
                 kwargs['precision'] = 32
 

From 8468c52cb124f9adf91f4890e759426a7248998d Mon Sep 17 00:00:00 2001
From: Hu Mingzhi <mingzhi.hu@intel.com>
Date: Mon, 15 Aug 2022 04:44:35 +0000
Subject: [PATCH 14/32] Update

---
 .../nano/test/pytorch/tests/test_trainer.py   | 23 -------
 .../pytorch/tests/test_trainer_precision.py   | 67 +++++++++++++++++++
 2 files changed, 67 insertions(+), 23 deletions(-)
 create mode 100644 python/nano/test/pytorch/tests/test_trainer_precision.py

diff --git a/python/nano/test/pytorch/tests/test_trainer.py b/python/nano/test/pytorch/tests/test_trainer.py
index ae8b869ff05..d66d6faceea 100644
--- a/python/nano/test/pytorch/tests/test_trainer.py
+++ b/python/nano/test/pytorch/tests/test_trainer.py
@@ -22,10 +22,7 @@
 
 import pytest
 import torch
-from torch.utils.data import DataLoader, TensorDataset
 from pytorch_lightning import LightningModule
-from pytorch_lightning.plugins.precision.native_amp import NativeMixedPrecisionPlugin
-from pytorch_lightning.plugins.precision.double import DoublePrecisionPlugin
 from test.pytorch.utils._train_torch_lightning import create_data_loader, data_transform
 from test.pytorch.utils._train_torch_lightning import train_with_linear_top_layer
 from torch import nn
@@ -81,26 +78,6 @@ def test_trainer_compile(self):
         pl_model = Trainer.compile(self.model, self.loss, self.optimizer)
         trainer.fit(pl_model, self.train_loader)
 
-    def test_trainer_precision_bf16(self):
-        model = ResNet18(10, pretrained=False, include_top=False, freeze=True)
-        loss = nn.CrossEntropyLoss()
-        optimizer = torch.optim.Adam(model.parameters(), lr=0.01)
-        pl_model = Trainer.compile(model, loss, optimizer)
-        if TORCH_VERSION_LESS_1_10:
-            trainer = Trainer(max_epochs=1, precision=64)
-            trainer.fit(pl_model, self.train_loader)
-            assert isinstance(trainer.strategy.precision_plugin, DoublePrecisionPlugin)
-            assert optimizer.param_groups[0]['params'][0].dtype is torch.float64
-        else:
-            trainer = Trainer(max_epochs=1, precision='bf16')
-            trainer.fit(pl_model, self.train_loader)
-            assert isinstance(trainer.strategy.precision_plugin, NativeMixedPrecisionPlugin)
-            # model is not converted to bfloat16 precision
-            input = TensorDataset(torch.rand(1, 3, 32, 32))
-            train_loader = DataLoader(input)
-            y_hat = trainer.predict(pl_model, train_loader)
-            assert y_hat[0].dtype is torch.bfloat16
-
     def test_trainer_save_load(self):
         trainer = Trainer(max_epochs=1)
         pl_model = Trainer.compile(self.model, self.loss, self.optimizer)
diff --git a/python/nano/test/pytorch/tests/test_trainer_precision.py b/python/nano/test/pytorch/tests/test_trainer_precision.py
new file mode 100644
index 00000000000..790304f83d6
--- /dev/null
+++ b/python/nano/test/pytorch/tests/test_trainer_precision.py
@@ -0,0 +1,67 @@
+import os
+from unittest import TestCase
+
+import pytest
+import torch
+from torch import nn
+from torch.utils.data import DataLoader, TensorDataset
+from pytorch_lightning.plugins.precision.native_amp import NativeMixedPrecisionPlugin
+from pytorch_lightning.plugins.precision.double import DoublePrecisionPlugin
+
+from bigdl.nano.pytorch import Trainer
+from bigdl.nano.pytorch.vision.models import vision
+from bigdl.nano.pytorch.utils import TORCH_VERSION_LESS_1_10
+
+from test.pytorch.tests.test_scale_lr import ResNetBase
+from test.pytorch.utils._train_torch_lightning import (create_data_loader,
+                                                       create_test_data_loader,
+                                                       data_transform)
+
+batch_size = 32
+dataset_size = 256
+num_workers = 0
+data_dir = os.path.join(os.path.dirname(__file__), "../data")
+
+
+class ResNet18(nn.Module):
+    def __init__(self, num_classes, pretrained=True, include_top=False, freeze=True):
+        super().__init__()
+        backbone = vision.resnet18(pretrained=pretrained, include_top=include_top, freeze=freeze)
+        output_size = backbone.get_output_size()
+        head = nn.Linear(output_size, num_classes)
+        self.model = nn.Sequential(backbone, head)
+
+    def forward(self, x):
+        return self.model(x)
+
+
+class TestTrainer(TestCase):
+    train_loader = create_data_loader(data_dir, batch_size, num_workers,
+                                      data_transform, dataset_size)
+    test_loader = create_test_data_loader(data_dir, batch_size, num_workers,
+                                          data_transform, dataset_size)
+
+    def test_trainer_precision(self):
+        model = ResNet18(10, pretrained=False, include_top=False, freeze=True)
+        loss = nn.CrossEntropyLoss()
+        optimizer = torch.optim.Adam(model.parameters(), lr=0.01)
+        pl_model = Trainer.compile(model, loss, optimizer)
+        if TORCH_VERSION_LESS_1_10:
+            trainer = Trainer(max_epochs=4, precision=64)
+            trainer.fit(pl_model, self.train_loader)
+            assert isinstance(trainer.strategy.precision_plugin, DoublePrecisionPlugin)
+            for opt in pl_model.optimizers():
+                assert opt.param_groups[0]['params'][0].dtype is torch.float64
+        else:
+            trainer = Trainer(max_epochs=4, precision='bf16')
+            trainer.fit(pl_model, self.train_loader)
+            assert isinstance(trainer.strategy.precision_plugin, NativeMixedPrecisionPlugin)
+            # model is not converted to bfloat16 precision
+            input = TensorDataset(torch.rand(1, 3, 32, 32))
+            train_loader = DataLoader(input)
+            y_hat = trainer.predict(pl_model, train_loader)
+            assert y_hat[0].dtype is torch.bfloat16
+
+
+if __name__ == '__main__':
+    pytest.main([__file__])

From d58f61657d5f9974063ea982213e40bea8d9ad94 Mon Sep 17 00:00:00 2001
From: Hu Mingzhi <mingzhi.hu@intel.com>
Date: Mon, 15 Aug 2022 04:47:33 +0000
Subject: [PATCH 15/32] Update

---
 .../pytorch/tests/test_trainer_precision.py     | 17 +++++++++++++++++
 1 file changed, 17 insertions(+)

diff --git a/python/nano/test/pytorch/tests/test_trainer_precision.py b/python/nano/test/pytorch/tests/test_trainer_precision.py
index 790304f83d6..9ebaa291b7a 100644
--- a/python/nano/test/pytorch/tests/test_trainer_precision.py
+++ b/python/nano/test/pytorch/tests/test_trainer_precision.py
@@ -1,3 +1,20 @@
+#
+# Copyright 2016 The BigDL Authors.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+
 import os
 from unittest import TestCase
 

From 4f0b04cc01ed1b43811bc7411f4e512b87a4e943 Mon Sep 17 00:00:00 2001
From: Hu Mingzhi <mingzhi.hu@intel.com>
Date: Mon, 15 Aug 2022 05:19:52 +0000
Subject: [PATCH 16/32] Update

---
 python/nano/test/pytorch/tests/test_trainer_precision.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/python/nano/test/pytorch/tests/test_trainer_precision.py b/python/nano/test/pytorch/tests/test_trainer_precision.py
index 9ebaa291b7a..31e7ad7ca4a 100644
--- a/python/nano/test/pytorch/tests/test_trainer_precision.py
+++ b/python/nano/test/pytorch/tests/test_trainer_precision.py
@@ -67,8 +67,8 @@ def test_trainer_precision(self):
             trainer = Trainer(max_epochs=4, precision=64)
             trainer.fit(pl_model, self.train_loader)
             assert isinstance(trainer.strategy.precision_plugin, DoublePrecisionPlugin)
-            for opt in pl_model.optimizers():
-                assert opt.param_groups[0]['params'][0].dtype is torch.float64
+            opt = pl_model.optimizers()
+            assert opt.param_groups[0]['params'][0].dtype is torch.float64
         else:
             trainer = Trainer(max_epochs=4, precision='bf16')
             trainer.fit(pl_model, self.train_loader)

From df09ca691b9f8d46482556d05ac650fdbbafe6e4 Mon Sep 17 00:00:00 2001
From: Hu Mingzhi <mingzhi.hu@intel.com>
Date: Mon, 15 Aug 2022 06:36:17 +0000
Subject: [PATCH 17/32] Update

---
 python/nano/test/pytorch/tests/test_plugin_ipex.py  | 5 +++++
 python/nano/test/pytorch/tests/test_trainer.py      | 4 ++--
 python/nano/test/pytorch/tests/test_trainer_ipex.py | 7 +++++++
 3 files changed, 14 insertions(+), 2 deletions(-)

diff --git a/python/nano/test/pytorch/tests/test_plugin_ipex.py b/python/nano/test/pytorch/tests/test_plugin_ipex.py
index a1826c15d00..31b6c3659f6 100644
--- a/python/nano/test/pytorch/tests/test_plugin_ipex.py
+++ b/python/nano/test/pytorch/tests/test_plugin_ipex.py
@@ -24,6 +24,8 @@
 
 from bigdl.nano.pytorch.lightning import LightningModule
 from bigdl.nano.pytorch import Trainer
+from bigdl.nano.common import check_avx512
+from bigdl.nano.pytorch.utils import TORCH_VERSION_LESS_1_10
 
 from test.pytorch.utils._train_torch_lightning import create_data_loader, data_transform
 from test.pytorch.utils._train_torch_lightning import create_test_data_loader
@@ -65,6 +67,9 @@ def test_trainer_subprocess_plugin(self):
         trainer.test(pl_model, self.test_data_loader)
 
     def test_trainer_subprocess_plugin_bf16(self):
+        # IPEX BF16 weight prepack needs the cpu support avx512bw, avx512vl and avx512dq
+        if not TORCH_VERSION_LESS_1_10 and not check_avx512():
+            return
         model = ResNet18(pretrained=False, include_top=False, freeze=True)
         loss = nn.CrossEntropyLoss()
         optimizer = torch.optim.Adam(model.parameters(), lr=0.01)
diff --git a/python/nano/test/pytorch/tests/test_trainer.py b/python/nano/test/pytorch/tests/test_trainer.py
index d66d6faceea..614c44ef348 100644
--- a/python/nano/test/pytorch/tests/test_trainer.py
+++ b/python/nano/test/pytorch/tests/test_trainer.py
@@ -26,10 +26,10 @@
 from test.pytorch.utils._train_torch_lightning import create_data_loader, data_transform
 from test.pytorch.utils._train_torch_lightning import train_with_linear_top_layer
 from torch import nn
+import torchmetrics
 
 from bigdl.nano.pytorch import Trainer
 from bigdl.nano.pytorch.vision.models import vision
-from bigdl.nano.pytorch.utils import TORCH_VERSION_LESS_1_10
 
 batch_size = 256
 num_workers = 0
@@ -96,7 +96,7 @@ def test_trainer_save_load(self):
         for k in original_state_dict.keys():
             assert (original_state_dict[k] == loaded_state_dict[k]).all()
         shutil.rmtree('saved_model')
-
+        
 
 if __name__ == '__main__':
     pytest.main([__file__])
diff --git a/python/nano/test/pytorch/tests/test_trainer_ipex.py b/python/nano/test/pytorch/tests/test_trainer_ipex.py
index 897397a08cc..f608ba32985 100644
--- a/python/nano/test/pytorch/tests/test_trainer_ipex.py
+++ b/python/nano/test/pytorch/tests/test_trainer_ipex.py
@@ -28,6 +28,7 @@
 from bigdl.nano.pytorch import Trainer
 from bigdl.nano.pytorch.vision.models import vision
 from bigdl.nano.pytorch.utils import TORCH_VERSION_LESS_1_10
+from bigdl.nano.common import check_avx512
 
 batch_size = 256
 max_epochs = 2
@@ -69,6 +70,9 @@ def test_trainer_save_checkpoint(self):
         trainer.fit(pl_model, self.train_loader)
 
     def test_trainer_ipex_bf16(self):
+        # IPEX BF16 weight prepack needs the cpu support avx512bw, avx512vl and avx512dq
+        if not TORCH_VERSION_LESS_1_10 and not check_avx512():
+            return
         trainer = Trainer(max_epochs=max_epochs, use_ipex=True, precision="bf16",
                           callbacks=[CheckIPEXFusedStepCallback()])
 
@@ -97,6 +101,9 @@ def test_trainer_ipex_bf16(self):
             ipex.enable_auto_mixed_precision(None)
 
     def test_trainer_ipex_bf16_unspport_optim(self):
+        # IPEX BF16 weight prepack needs the cpu support avx512bw, avx512vl and avx512dq
+        if not TORCH_VERSION_LESS_1_10 and not check_avx512():
+            return
         trainer = Trainer(max_epochs=max_epochs, use_ipex=True, precision="bf16",
                           callbacks=[CheckIPEXFusedStepCallback()])
 

From a0c7e33b0745103bce5c1e9453e1006a4ae57b46 Mon Sep 17 00:00:00 2001
From: Hu Mingzhi <mingzhi.hu@intel.com>
Date: Mon, 15 Aug 2022 07:08:59 +0000
Subject: [PATCH 18/32] Update

---
 python/nano/src/bigdl/nano/pytorch/trainer/Trainer.py | 4 ++++
 python/nano/test/pytorch/tests/test_plugin_ipex.py    | 2 +-
 python/nano/test/pytorch/tests/test_trainer_ipex.py   | 4 ++--
 3 files changed, 7 insertions(+), 3 deletions(-)

diff --git a/python/nano/src/bigdl/nano/pytorch/trainer/Trainer.py b/python/nano/src/bigdl/nano/pytorch/trainer/Trainer.py
index 9f70187267f..5827a7855f3 100644
--- a/python/nano/src/bigdl/nano/pytorch/trainer/Trainer.py
+++ b/python/nano/src/bigdl/nano/pytorch/trainer/Trainer.py
@@ -107,6 +107,10 @@ def __init__(self, num_processes: int = 1,
                     " without avx512 will crash."
                     "Fall back to regular pytorch.")
             use_ipex = False
+            # torch must be greater or equal to 1.10 to use bfloat16 without ipex
+            if TORCH_VERSION_LESS_1_10:
+                if kwargs.get('precision', None) == "bf16":
+                    kwargs['precision'] = 32
 
         self.use_ipex = use_ipex
 
diff --git a/python/nano/test/pytorch/tests/test_plugin_ipex.py b/python/nano/test/pytorch/tests/test_plugin_ipex.py
index 31b6c3659f6..cb2aa663639 100644
--- a/python/nano/test/pytorch/tests/test_plugin_ipex.py
+++ b/python/nano/test/pytorch/tests/test_plugin_ipex.py
@@ -68,7 +68,7 @@ def test_trainer_subprocess_plugin(self):
 
     def test_trainer_subprocess_plugin_bf16(self):
         # IPEX BF16 weight prepack needs the cpu support avx512bw, avx512vl and avx512dq
-        if not TORCH_VERSION_LESS_1_10 and not check_avx512():
+        if not check_avx512():
             return
         model = ResNet18(pretrained=False, include_top=False, freeze=True)
         loss = nn.CrossEntropyLoss()
diff --git a/python/nano/test/pytorch/tests/test_trainer_ipex.py b/python/nano/test/pytorch/tests/test_trainer_ipex.py
index f608ba32985..aa7178dc415 100644
--- a/python/nano/test/pytorch/tests/test_trainer_ipex.py
+++ b/python/nano/test/pytorch/tests/test_trainer_ipex.py
@@ -71,7 +71,7 @@ def test_trainer_save_checkpoint(self):
 
     def test_trainer_ipex_bf16(self):
         # IPEX BF16 weight prepack needs the cpu support avx512bw, avx512vl and avx512dq
-        if not TORCH_VERSION_LESS_1_10 and not check_avx512():
+        if not check_avx512():
             return
         trainer = Trainer(max_epochs=max_epochs, use_ipex=True, precision="bf16",
                           callbacks=[CheckIPEXFusedStepCallback()])
@@ -102,7 +102,7 @@ def test_trainer_ipex_bf16(self):
 
     def test_trainer_ipex_bf16_unspport_optim(self):
         # IPEX BF16 weight prepack needs the cpu support avx512bw, avx512vl and avx512dq
-        if not TORCH_VERSION_LESS_1_10 and not check_avx512():
+        if not check_avx512():
             return
         trainer = Trainer(max_epochs=max_epochs, use_ipex=True, precision="bf16",
                           callbacks=[CheckIPEXFusedStepCallback()])

From 3614bd5f731b31ebdb9fc12002f3e3e2db96160a Mon Sep 17 00:00:00 2001
From: Hu Mingzhi <mingzhi.hu@intel.com>
Date: Mon, 15 Aug 2022 09:27:21 +0000
Subject: [PATCH 19/32] Update

---
 .../src/bigdl/nano/pytorch/trainer/Trainer.py | 37 +++++++++----------
 1 file changed, 17 insertions(+), 20 deletions(-)

diff --git a/python/nano/src/bigdl/nano/pytorch/trainer/Trainer.py b/python/nano/src/bigdl/nano/pytorch/trainer/Trainer.py
index 5827a7855f3..40126b98977 100644
--- a/python/nano/src/bigdl/nano/pytorch/trainer/Trainer.py
+++ b/python/nano/src/bigdl/nano/pytorch/trainer/Trainer.py
@@ -102,27 +102,24 @@ def __init__(self, num_processes: int = 1,
             else:
                 kwargs["callbacks"] = [ChannelsLastCallback()]
 
-        if TORCH_VERSION_LESS_1_11 and use_ipex and not check_avx512():
-            warning("Enable ipex<=1.10 in a cpu instruction set"
-                    " without avx512 will crash."
-                    "Fall back to regular pytorch.")
-            use_ipex = False
-            # torch must be greater or equal to 1.10 to use bfloat16 without ipex
-            if TORCH_VERSION_LESS_1_10:
-                if kwargs.get('precision', None) == "bf16":
-                    kwargs['precision'] = 32
-
         self.use_ipex = use_ipex
-
-        enable_bf16 = False
-
-        # enable precision plugin for IPEX BF16
-        if self.use_ipex and kwargs.get('precision', None) == "bf16":
-            enable_bf16 = True
-            # No need to set `precision` because strategy has higher priority
-            # than accelerator/plugin
-            if TORCH_VERSION_LESS_1_10:
-                kwargs['precision'] = 32
+        enable_bf16 = self.use_ipex and kwargs.get('precision', None) == 'bf16'
+
+        if self.use_ipex and not check_avx512():
+            if TORCH_VERSION_LESS_1_11:
+                warning("Enable ipex<=1.10 in a cpu instruction set"
+                        " without avx512 will crash."
+                        "Fall back to regular pytorch.")
+                self.use_ipex = False
+                if TORCH_VERSION_LESS_1_10 and enable_bf16:
+                    warning("torch must be greater or equal to 1.10 to use bfloat16 without ipex."
+                            "Will use 32-bit precision")
+                    kwargs['precision'] = 32
+            elif enable_bf16:
+                warning("Enable IPEX bfloat16 in a cpu instruction set"
+                        " without avx512 will crash. "
+                        "Will use PyTorch Lightning BFloat16 Mixed Precision")
+                enable_bf16 = False
 
         if num_processes == 1:
             from bigdl.nano.pytorch.strategies import create_IPEXStrategy

From 61f1d2a293548304c72a1fabbb06215e392cfcd6 Mon Sep 17 00:00:00 2001
From: Hu Mingzhi <mingzhi.hu@intel.com>
Date: Mon, 15 Aug 2022 09:48:13 +0000
Subject: [PATCH 20/32] Update

---
 python/nano/test/pytorch/tests/test_bf16_ipex.py | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/python/nano/test/pytorch/tests/test_bf16_ipex.py b/python/nano/test/pytorch/tests/test_bf16_ipex.py
index 38e45ce165d..7eb27feb3d1 100644
--- a/python/nano/test/pytorch/tests/test_bf16_ipex.py
+++ b/python/nano/test/pytorch/tests/test_bf16_ipex.py
@@ -20,6 +20,7 @@
 from torchvision.models.resnet import resnet18
 from unittest.mock import MagicMock, Mock, PropertyMock, patch
 from bigdl.nano.pytorch.utils import TORCH_VERSION_LESS_1_10, TORCH_VERSION_LESS_1_12
+from bigdl.nano.common import check_avx512
 
 
 class Pytorch1_9:
@@ -40,6 +41,8 @@ def test_bf16_common(self):
         """
         Debug mode. Allow run bf16 forward without bf16 instruction support.
         """
+        if not check_avx512():
+            return
         trainer = Trainer(max_epochs=1)
         model = resnet18(num_classes=10)
 

From 3b957a4b69248eb71adf90d9b7a7fa4a5d95c757 Mon Sep 17 00:00:00 2001
From: Hu Mingzhi <mingzhi.hu@intel.com>
Date: Mon, 15 Aug 2022 09:49:22 +0000
Subject: [PATCH 21/32] Update

---
 python/nano/test/pytorch/tests/test_bf16_ipex.py | 3 ---
 1 file changed, 3 deletions(-)

diff --git a/python/nano/test/pytorch/tests/test_bf16_ipex.py b/python/nano/test/pytorch/tests/test_bf16_ipex.py
index 7eb27feb3d1..38e45ce165d 100644
--- a/python/nano/test/pytorch/tests/test_bf16_ipex.py
+++ b/python/nano/test/pytorch/tests/test_bf16_ipex.py
@@ -20,7 +20,6 @@
 from torchvision.models.resnet import resnet18
 from unittest.mock import MagicMock, Mock, PropertyMock, patch
 from bigdl.nano.pytorch.utils import TORCH_VERSION_LESS_1_10, TORCH_VERSION_LESS_1_12
-from bigdl.nano.common import check_avx512
 
 
 class Pytorch1_9:
@@ -41,8 +40,6 @@ def test_bf16_common(self):
         """
         Debug mode. Allow run bf16 forward without bf16 instruction support.
         """
-        if not check_avx512():
-            return
         trainer = Trainer(max_epochs=1)
         model = resnet18(num_classes=10)
 

From d055eea2e62aa4d9b9fd0fa1430cbf507caeb018 Mon Sep 17 00:00:00 2001
From: Hu Mingzhi <mingzhi.hu@intel.com>
Date: Mon, 15 Aug 2022 10:52:13 +0000
Subject: [PATCH 22/32] Update

---
 .../nano/src/bigdl/nano/pytorch/strategies/ddp_spawn.py  | 7 ++++++-
 python/nano/src/bigdl/nano/pytorch/trainer/Trainer.py    | 9 +++++----
 python/nano/test/pytorch/tests/test_plugin_ipex.py       | 5 +++++
 3 files changed, 16 insertions(+), 5 deletions(-)

diff --git a/python/nano/src/bigdl/nano/pytorch/strategies/ddp_spawn.py b/python/nano/src/bigdl/nano/pytorch/strategies/ddp_spawn.py
index 94e10ff30a3..c476efe5428 100644
--- a/python/nano/src/bigdl/nano/pytorch/strategies/ddp_spawn.py
+++ b/python/nano/src/bigdl/nano/pytorch/strategies/ddp_spawn.py
@@ -180,7 +180,12 @@ def __init__(
         if use_ipex and TORCH_VERSION_LESS_1_10 and 'accelerator' not in kwargs:
             super().__init__(accelerator=create_IPEXAccelerator(),
                              parallel_devices=parallel_devices,
-                             cluster_environment=cluster_environment, **kwargs)
+                             cluster_environment=cluster_environment,
+                             **kwargs)
+            if enable_bf16:
+                import intel_pytorch_extension as ipex
+                # Automatically mix precision
+                ipex.enable_auto_mixed_precision(mixed_dtype=torch.bfloat16)
         elif use_ipex and enable_bf16 and 'precision_plugin' not in kwargs:
             from bigdl.nano.pytorch.strategies.ipex.ipex_strategy import IPEXBF16Precision
             super().__init__(parallel_devices=parallel_devices,
diff --git a/python/nano/src/bigdl/nano/pytorch/trainer/Trainer.py b/python/nano/src/bigdl/nano/pytorch/trainer/Trainer.py
index 40126b98977..6f58a33a978 100644
--- a/python/nano/src/bigdl/nano/pytorch/trainer/Trainer.py
+++ b/python/nano/src/bigdl/nano/pytorch/trainer/Trainer.py
@@ -105,16 +105,17 @@ def __init__(self, num_processes: int = 1,
         self.use_ipex = use_ipex
         enable_bf16 = self.use_ipex and kwargs.get('precision', None) == 'bf16'
 
+        # No need to set precision for torch greater or equal to 1.10,
+        # because strategy > accelerator/precision/plugin
+        if TORCH_VERSION_LESS_1_10 and enable_bf16:
+            kwargs['precision'] = 32
+
         if self.use_ipex and not check_avx512():
             if TORCH_VERSION_LESS_1_11:
                 warning("Enable ipex<=1.10 in a cpu instruction set"
                         " without avx512 will crash."
                         "Fall back to regular pytorch.")
                 self.use_ipex = False
-                if TORCH_VERSION_LESS_1_10 and enable_bf16:
-                    warning("torch must be greater or equal to 1.10 to use bfloat16 without ipex."
-                            "Will use 32-bit precision")
-                    kwargs['precision'] = 32
             elif enable_bf16:
                 warning("Enable IPEX bfloat16 in a cpu instruction set"
                         " without avx512 will crash. "
diff --git a/python/nano/test/pytorch/tests/test_plugin_ipex.py b/python/nano/test/pytorch/tests/test_plugin_ipex.py
index cb2aa663639..13e1b572664 100644
--- a/python/nano/test/pytorch/tests/test_plugin_ipex.py
+++ b/python/nano/test/pytorch/tests/test_plugin_ipex.py
@@ -82,6 +82,11 @@ def test_trainer_subprocess_plugin_bf16(self):
                           callbacks=[CheckIPEXCallback(), CheckIPEXFusedStepCallback()])
         trainer.fit(pl_model, self.data_loader, self.test_data_loader)
         trainer.test(pl_model, self.test_data_loader)
+        if TORCH_VERSION_LESS_1_10:
+            import intel_pytorch_extension as ipex
+            # Diable IPEX AMP
+            # Avoid affecting other tests
+            ipex.enable_auto_mixed_precision(None)
 
 
 if __name__ == '__main__':

From 45d8b8bb70d9b1cc932491e60bb04eb7c782717d Mon Sep 17 00:00:00 2001
From: Hu Mingzhi <mingzhi.hu@intel.com>
Date: Mon, 15 Aug 2022 13:22:40 +0000
Subject: [PATCH 23/32] Update

---
 python/nano/src/bigdl/nano/pytorch/trainer/Trainer.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/python/nano/src/bigdl/nano/pytorch/trainer/Trainer.py b/python/nano/src/bigdl/nano/pytorch/trainer/Trainer.py
index 6f58a33a978..fdb34ae6341 100644
--- a/python/nano/src/bigdl/nano/pytorch/trainer/Trainer.py
+++ b/python/nano/src/bigdl/nano/pytorch/trainer/Trainer.py
@@ -105,8 +105,8 @@ def __init__(self, num_processes: int = 1,
         self.use_ipex = use_ipex
         enable_bf16 = self.use_ipex and kwargs.get('precision', None) == 'bf16'
 
-        # No need to set precision for torch greater or equal to 1.10,
-        # because strategy > accelerator/precision/plugin
+        # Set 'precision' for strategy without precision_plugin, 
+        # Strategy > accelerator/precision/plugin
         if TORCH_VERSION_LESS_1_10 and enable_bf16:
             kwargs['precision'] = 32
 

From 173021716eab513fc037c251b3f519461863b501 Mon Sep 17 00:00:00 2001
From: Hu Mingzhi <mingzhi.hu@intel.com>
Date: Mon, 15 Aug 2022 13:26:23 +0000
Subject: [PATCH 24/32] Update

---
 python/nano/src/bigdl/nano/pytorch/trainer/Trainer.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/python/nano/src/bigdl/nano/pytorch/trainer/Trainer.py b/python/nano/src/bigdl/nano/pytorch/trainer/Trainer.py
index fdb34ae6341..dfcde15193e 100644
--- a/python/nano/src/bigdl/nano/pytorch/trainer/Trainer.py
+++ b/python/nano/src/bigdl/nano/pytorch/trainer/Trainer.py
@@ -105,7 +105,7 @@ def __init__(self, num_processes: int = 1,
         self.use_ipex = use_ipex
         enable_bf16 = self.use_ipex and kwargs.get('precision', None) == 'bf16'
 
-        # Set 'precision' for strategy without precision_plugin, 
+        # Set 'precision' for strategy without precision_plugin,
         # Strategy > accelerator/precision/plugin
         if TORCH_VERSION_LESS_1_10 and enable_bf16:
             kwargs['precision'] = 32

From 27037dc6361180b54b7c6b1820f4fe8a2d35d41d Mon Sep 17 00:00:00 2001
From: Hu Mingzhi <mingzhi.hu@intel.com>
Date: Tue, 16 Aug 2022 00:24:29 +0000
Subject: [PATCH 25/32] reduce ut time and re-run action

---
 .../src/bigdl/nano/pytorch/trainer/Trainer.py  |  3 ++-
 .../test/pytorch/tests/test_plugin_ipex.py     |  4 +---
 .../test/pytorch/tests/test_trainer_ipex.py    | 18 +++++++-----------
 .../pytorch/tests/test_trainer_precision.py    |  5 +++--
 .../test/pytorch/utils/_train_ipex_callback.py |  7 ++++++-
 5 files changed, 19 insertions(+), 18 deletions(-)

diff --git a/python/nano/src/bigdl/nano/pytorch/trainer/Trainer.py b/python/nano/src/bigdl/nano/pytorch/trainer/Trainer.py
index dfcde15193e..9160ef6f938 100644
--- a/python/nano/src/bigdl/nano/pytorch/trainer/Trainer.py
+++ b/python/nano/src/bigdl/nano/pytorch/trainer/Trainer.py
@@ -107,6 +107,7 @@ def __init__(self, num_processes: int = 1,
 
         # Set 'precision' for strategy without precision_plugin,
         # Strategy > accelerator/precision/plugin
+        # torch must be greater or equal to 1.10 to use natice amp for bfloat16 precision
         if TORCH_VERSION_LESS_1_10 and enable_bf16:
             kwargs['precision'] = 32
 
@@ -119,7 +120,7 @@ def __init__(self, num_processes: int = 1,
             elif enable_bf16:
                 warning("Enable IPEX bfloat16 in a cpu instruction set"
                         " without avx512 will crash. "
-                        "Will use PyTorch Lightning BFloat16 Mixed Precision")
+                        "Will use PyTorch Lightning Native AMP for BFloat16 precision")
                 enable_bf16 = False
 
         if num_processes == 1:
diff --git a/python/nano/test/pytorch/tests/test_plugin_ipex.py b/python/nano/test/pytorch/tests/test_plugin_ipex.py
index 13e1b572664..0228370ef4a 100644
--- a/python/nano/test/pytorch/tests/test_plugin_ipex.py
+++ b/python/nano/test/pytorch/tests/test_plugin_ipex.py
@@ -68,8 +68,6 @@ def test_trainer_subprocess_plugin(self):
 
     def test_trainer_subprocess_plugin_bf16(self):
         # IPEX BF16 weight prepack needs the cpu support avx512bw, avx512vl and avx512dq
-        if not check_avx512():
-            return
         model = ResNet18(pretrained=False, include_top=False, freeze=True)
         loss = nn.CrossEntropyLoss()
         optimizer = torch.optim.Adam(model.parameters(), lr=0.01)
@@ -82,7 +80,7 @@ def test_trainer_subprocess_plugin_bf16(self):
                           callbacks=[CheckIPEXCallback(), CheckIPEXFusedStepCallback()])
         trainer.fit(pl_model, self.data_loader, self.test_data_loader)
         trainer.test(pl_model, self.test_data_loader)
-        if TORCH_VERSION_LESS_1_10:
+        if trainer.use_ipex and TORCH_VERSION_LESS_1_10:
             import intel_pytorch_extension as ipex
             # Diable IPEX AMP
             # Avoid affecting other tests
diff --git a/python/nano/test/pytorch/tests/test_trainer_ipex.py b/python/nano/test/pytorch/tests/test_trainer_ipex.py
index aa7178dc415..74f4e23fd0d 100644
--- a/python/nano/test/pytorch/tests/test_trainer_ipex.py
+++ b/python/nano/test/pytorch/tests/test_trainer_ipex.py
@@ -55,11 +55,11 @@ class TestTrainer(TestCase):
     optimizer = torch.optim.Adam(model.parameters(), lr=0.01)
     scheduler_dict = {
         "scheduler": OneCycleLR(
-                optimizer,
-                0.1,
-                epochs=max_epochs,
-                steps_per_epoch=len(train_loader),
-            ),
+            optimizer,
+            0.1,
+            epochs=max_epochs,
+            steps_per_epoch=len(train_loader),
+        ),
         "interval": "step",
     }
 
@@ -71,8 +71,6 @@ def test_trainer_save_checkpoint(self):
 
     def test_trainer_ipex_bf16(self):
         # IPEX BF16 weight prepack needs the cpu support avx512bw, avx512vl and avx512dq
-        if not check_avx512():
-            return
         trainer = Trainer(max_epochs=max_epochs, use_ipex=True, precision="bf16",
                           callbacks=[CheckIPEXFusedStepCallback()])
 
@@ -94,7 +92,7 @@ def test_trainer_ipex_bf16(self):
         trainer.fit(pl_model, self.train_loader)
         trainer.test(pl_model, self.train_loader)
 
-        if TORCH_VERSION_LESS_1_10:
+        if trainer.use_ipex and TORCH_VERSION_LESS_1_10:
             import intel_pytorch_extension as ipex
             # Diable IPEX AMP
             # Avoid affecting other tests
@@ -102,8 +100,6 @@ def test_trainer_ipex_bf16(self):
 
     def test_trainer_ipex_bf16_unspport_optim(self):
         # IPEX BF16 weight prepack needs the cpu support avx512bw, avx512vl and avx512dq
-        if not check_avx512():
-            return
         trainer = Trainer(max_epochs=max_epochs, use_ipex=True, precision="bf16",
                           callbacks=[CheckIPEXFusedStepCallback()])
 
@@ -124,7 +120,7 @@ def test_trainer_ipex_bf16_unspport_optim(self):
         trainer.fit(pl_model, self.train_loader)
         trainer.test(pl_model, self.train_loader)
 
-        if TORCH_VERSION_LESS_1_10:
+        if trainer.use_ipex and TORCH_VERSION_LESS_1_10:
             import intel_pytorch_extension as ipex
             # Diable IPEX AMP
             # Avoid affecting other tests
diff --git a/python/nano/test/pytorch/tests/test_trainer_precision.py b/python/nano/test/pytorch/tests/test_trainer_precision.py
index 31e7ad7ca4a..d30b2a9c3d9 100644
--- a/python/nano/test/pytorch/tests/test_trainer_precision.py
+++ b/python/nano/test/pytorch/tests/test_trainer_precision.py
@@ -63,14 +63,15 @@ def test_trainer_precision(self):
         loss = nn.CrossEntropyLoss()
         optimizer = torch.optim.Adam(model.parameters(), lr=0.01)
         pl_model = Trainer.compile(model, loss, optimizer)
+        # torch must be greater or euqal to 1.10 to use native amp for bfloat16 precision
         if TORCH_VERSION_LESS_1_10:
-            trainer = Trainer(max_epochs=4, precision=64)
+            trainer = Trainer(max_epochs=2, precision=64)
             trainer.fit(pl_model, self.train_loader)
             assert isinstance(trainer.strategy.precision_plugin, DoublePrecisionPlugin)
             opt = pl_model.optimizers()
             assert opt.param_groups[0]['params'][0].dtype is torch.float64
         else:
-            trainer = Trainer(max_epochs=4, precision='bf16')
+            trainer = Trainer(max_epochs=2, precision='bf16')
             trainer.fit(pl_model, self.train_loader)
             assert isinstance(trainer.strategy.precision_plugin, NativeMixedPrecisionPlugin)
             # model is not converted to bfloat16 precision
diff --git a/python/nano/test/pytorch/utils/_train_ipex_callback.py b/python/nano/test/pytorch/utils/_train_ipex_callback.py
index 684289c8606..1b972108892 100644
--- a/python/nano/test/pytorch/utils/_train_ipex_callback.py
+++ b/python/nano/test/pytorch/utils/_train_ipex_callback.py
@@ -20,9 +20,11 @@
 import pytorch_lightning as pl
 from pytorch_lightning.callbacks import Callback
 from pytorch_lightning.plugins.training_type import SingleDevicePlugin, DDPSpawnPlugin
-from bigdl.nano.pytorch.utils import TORCH_VERSION_LESS_1_10
 from pytorch_lightning.accelerators.cpu import CPUAccelerator
 
+from bigdl.nano.pytorch.utils import TORCH_VERSION_LESS_1_10
+from bigdl.nano.common import check_avx512
+
 
 class CheckIPEXCallback(Callback):
     def on_train_start(self, trainer, pl_module):
@@ -76,6 +78,9 @@ def check_ipex_layers(m):
 
 class CheckIPEXFusedStepCallback(Callback):
     def on_train_start(self, trainer: "pl.Trainer", pl_module: "pl.LightningModule"):
+        if not check_avx512():
+            # IPEX BF16 weight prepack needs the cpu support avx512bw, avx512vl and avx512dq
+            return
         if not TORCH_VERSION_LESS_1_10:
             from intel_extension_for_pytorch.optim._optimizer_utils import IPEX_FUSED_OPTIMIZER_LIST
             # IPEX only support one optimizer

From 070fe97bd690f5fa38ecafe7bf461a4557746557 Mon Sep 17 00:00:00 2001
From: Hu Mingzhi <mingzhi.hu@intel.com>
Date: Tue, 16 Aug 2022 01:33:49 +0000
Subject: [PATCH 26/32] track avx512

---
 python/nano/test/pytorch/tests/test_plugin_ipex.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/python/nano/test/pytorch/tests/test_plugin_ipex.py b/python/nano/test/pytorch/tests/test_plugin_ipex.py
index 0228370ef4a..7d9015aca7d 100644
--- a/python/nano/test/pytorch/tests/test_plugin_ipex.py
+++ b/python/nano/test/pytorch/tests/test_plugin_ipex.py
@@ -66,7 +66,7 @@ def test_trainer_subprocess_plugin(self):
         trainer.fit(pl_model, self.data_loader, self.test_data_loader)
         trainer.test(pl_model, self.test_data_loader)
 
-    def test_trainer_subprocess_plugin_bf16(self):
+    def test_trainer_spawn_plugin_bf16(self):
         # IPEX BF16 weight prepack needs the cpu support avx512bw, avx512vl and avx512dq
         model = ResNet18(pretrained=False, include_top=False, freeze=True)
         loss = nn.CrossEntropyLoss()
@@ -75,7 +75,7 @@ def test_trainer_subprocess_plugin_bf16(self):
             model, loss, optimizer,
             metrics=[torchmetrics.F1(num_classes), torchmetrics.Accuracy(num_classes=10)]
         )
-        trainer = Trainer(num_processes=2, distributed_backend="subprocess",
+        trainer = Trainer(num_processes=2, distributed_backend="spawn",
                           max_epochs=4, use_ipex=True, precision="bf16",
                           callbacks=[CheckIPEXCallback(), CheckIPEXFusedStepCallback()])
         trainer.fit(pl_model, self.data_loader, self.test_data_loader)

From c5dd357edce3164820cdc034b73281ff11ea559e Mon Sep 17 00:00:00 2001
From: Hu Mingzhi <mingzhi.hu@intel.com>
Date: Wed, 17 Aug 2022 05:29:05 +0000
Subject: [PATCH 27/32] Update lite bf16 training

---
 .../pytorch/strategies/ipex/ipex_strategy.py  |  8 ++++--
 .../nano/src/bigdl/nano/pytorch/torch_nano.py | 27 +++++++++++++------
 .../src/bigdl/nano/pytorch/trainer/Trainer.py |  2 +-
 .../pytorch/tests/test_torch_nano_ipex.py     | 19 +++++++++++--
 4 files changed, 43 insertions(+), 13 deletions(-)

diff --git a/python/nano/src/bigdl/nano/pytorch/strategies/ipex/ipex_strategy.py b/python/nano/src/bigdl/nano/pytorch/strategies/ipex/ipex_strategy.py
index 28166f72e8f..fe8343964dd 100644
--- a/python/nano/src/bigdl/nano/pytorch/strategies/ipex/ipex_strategy.py
+++ b/python/nano/src/bigdl/nano/pytorch/strategies/ipex/ipex_strategy.py
@@ -27,7 +27,8 @@
 import pytorch_lightning as pl
 from pytorch_lightning.strategies import SingleDeviceStrategy
 from pytorch_lightning.accelerators.accelerator import Accelerator
-from pytorch_lightning.plugins.precision import PrecisionPlugin, NativeMixedPrecisionPlugin
+from pytorch_lightning.plugins.precision import PrecisionPlugin, MixedPrecisionPlugin
+from pytorch_lightning.utilities import AMPType
 
 from bigdl.nano.utils.log4Error import invalidInputError
 import intel_extension_for_pytorch as ipex
@@ -78,9 +79,12 @@ def setup(self, trainer: pl.Trainer) -> None:
             invalidInputError(False, "Ipex does not support more than one optimizers.")
 
 
-class IPEXBF16Precision(PrecisionPlugin):
+class IPEXBF16Precision(MixedPrecisionPlugin):
     """Create Precision Plugin for IPEX BFloat16."""
 
+    backend: "AMPType" = AMPType.NATIVE
+    precision: Union[str, int] = 'bf16'
+
     @contextmanager
     def forward_context(self):
         """AMP for managing model forward/training_step/evaluation_step/predict_step."""
diff --git a/python/nano/src/bigdl/nano/pytorch/torch_nano.py b/python/nano/src/bigdl/nano/pytorch/torch_nano.py
index eee05e66142..de99d06844a 100644
--- a/python/nano/src/bigdl/nano/pytorch/torch_nano.py
+++ b/python/nano/src/bigdl/nano/pytorch/torch_nano.py
@@ -52,7 +52,6 @@ class TorchNano(LightningLite):
 
     def __init__(self, num_processes: int = 1,
                  use_ipex: bool = False,
-                 enable_bf16: bool = False,
                  strategy: str = "subprocess",
                  *args, **kwargs) -> None:
         """
@@ -66,13 +65,25 @@ def __init__(self, num_processes: int = 1,
         """
         self.num_processes = num_processes
         self.use_ipex = use_ipex
-        self.enable_bf16 = enable_bf16
-
-        if TORCH_VERSION_LESS_1_11 and use_ipex and not check_avx512():
-            warning("Enable ipex<=1.10 in a cpu instruction set"
-                    " without avx512 will crash."
-                    "Fall back to regular pytorch.")
-            self.use_ipex = False
+        self.enable_bf16 = self.use_ipex and kwargs.get('precision', None) == 'bf16'
+
+        # Set 'precision' for strategy without precision_plugin,
+        # Strategy > accelerator/precision/plugin
+        # torch must be greater or equal to 1.10 to use native amp for bfloat16 precision
+        if TORCH_VERSION_LESS_1_10 and enable_bf16:
+            kwargs['precision'] = 32
+
+        if self.use_ipex and not check_avx512():
+            if TORCH_VERSION_LESS_1_11:
+                warning("Enable ipex<=1.10 in a cpu instruction set"
+                        " without avx512 will crash."
+                        "Fall back to regular pytorch.")
+                self.use_ipex = False
+            elif enable_bf16:
+                warning("Enable IPEX bfloat16 in a cpu instruction set"
+                        " without avx512 will crash. "
+                        "Will use PyTorch Lightning Native AMP for BFloat16 precision")
+                enable_bf16 = False
 
         if self.num_processes == 1:
             if self.use_ipex:
diff --git a/python/nano/src/bigdl/nano/pytorch/trainer/Trainer.py b/python/nano/src/bigdl/nano/pytorch/trainer/Trainer.py
index 9160ef6f938..0e32b08a622 100644
--- a/python/nano/src/bigdl/nano/pytorch/trainer/Trainer.py
+++ b/python/nano/src/bigdl/nano/pytorch/trainer/Trainer.py
@@ -107,7 +107,7 @@ def __init__(self, num_processes: int = 1,
 
         # Set 'precision' for strategy without precision_plugin,
         # Strategy > accelerator/precision/plugin
-        # torch must be greater or equal to 1.10 to use natice amp for bfloat16 precision
+        # torch must be greater or equal to 1.10 to use native amp for bfloat16 precision
         if TORCH_VERSION_LESS_1_10 and enable_bf16:
             kwargs['precision'] = 32
 
diff --git a/python/nano/test/pytorch/tests/test_torch_nano_ipex.py b/python/nano/test/pytorch/tests/test_torch_nano_ipex.py
index 3c01e1e639e..8d6b9ac4862 100644
--- a/python/nano/test/pytorch/tests/test_torch_nano_ipex.py
+++ b/python/nano/test/pytorch/tests/test_torch_nano_ipex.py
@@ -45,10 +45,13 @@ def forward(self, x):
 
 
 class MyNano(TorchNano):
-    def train(self):
+    def train(self, optimizer_supported: bool = False):
         model = ResNet18(10, pretrained=False, include_top=False, freeze=True)
         loss_func = nn.CrossEntropyLoss()
-        optimizer = torch.optim.Adam(model.parameters(), lr=0.01)
+        if optimizer_supported:
+            optimizer = torch.optim.SGD(model.parameters, lr=0.01)
+        else:
+            optimizer = torch.optim.Adam(model.parameters(), lr=0.01)
         train_loader = create_data_loader(data_dir, batch_size, num_workers, data_transform)
 
         model, optimizer, train_loader = self.setup(model, optimizer, train_loader)
@@ -132,6 +135,18 @@ def test_torch_nano_spawn_correctness(self):
     def test_torch_nano_subprocess_correctness(self):
         MyNanoCorrectness(use_ipex=True, num_processes=2, strategy="subprocess").train(0.5)
 
+    def test_torch_nano_bf16_support_opt(self):
+        MyNano(use_ipex=True, precision='bf16').train(optimizer_supported=True)
+
+    def test_torch_nano_bf16_unsupport_opt(self):
+        MyNano(use_ipex=True, precision='bf16').train()
+
+    def test_torch_nano_bf16_spawn(self):
+        MyNano(use_ipex=True, precision='bf16', num_processes=2, strategy="spawn").train()
+
+    def test_torch_nano_bf16_subprocess(self):
+        MyNano(use_ipex=True, precision='bf16', num_processes=2, strategy="subprocess").train()
+
 
 if __name__ == '__main__':
     pytest.main([__file__])

From ff461c7c321f8d616573b708ea0127d8fdaba836 Mon Sep 17 00:00:00 2001
From: Hu Mingzhi <mingzhi.hu@intel.com>
Date: Wed, 17 Aug 2022 07:27:07 +0000
Subject: [PATCH 28/32] Update

---
 .../nano/src/bigdl/nano/pytorch/torch_nano.py | 22 +++++++++----------
 .../pytorch/tests/test_torch_nano_ipex.py     |  3 +--
 2 files changed, 12 insertions(+), 13 deletions(-)

diff --git a/python/nano/src/bigdl/nano/pytorch/torch_nano.py b/python/nano/src/bigdl/nano/pytorch/torch_nano.py
index de99d06844a..4c9e9ab8c36 100644
--- a/python/nano/src/bigdl/nano/pytorch/torch_nano.py
+++ b/python/nano/src/bigdl/nano/pytorch/torch_nano.py
@@ -59,7 +59,6 @@ def __init__(self, num_processes: int = 1,
 
         :param num_processes: number of processes in distributed training, defaults to 1
         :param use_ipex: whether use ipex acceleration, defaults to False
-        :param enable_bf16: whether use bf16 acceleration, defaults to False
         :param strategy: use which backend in distributed mode, defaults to "subprocess", \
             now avaiable strategies are 'spawn', 'subprocess' and 'ray'
         """
@@ -70,7 +69,7 @@ def __init__(self, num_processes: int = 1,
         # Set 'precision' for strategy without precision_plugin,
         # Strategy > accelerator/precision/plugin
         # torch must be greater or equal to 1.10 to use native amp for bfloat16 precision
-        if TORCH_VERSION_LESS_1_10 and enable_bf16:
+        if TORCH_VERSION_LESS_1_10 and self.enable_bf16:
             kwargs['precision'] = 32
 
         if self.use_ipex and not check_avx512():
@@ -79,11 +78,11 @@ def __init__(self, num_processes: int = 1,
                         " without avx512 will crash."
                         "Fall back to regular pytorch.")
                 self.use_ipex = False
-            elif enable_bf16:
+            elif self.enable_bf16:
                 warning("Enable IPEX bfloat16 in a cpu instruction set"
                         " without avx512 will crash. "
                         "Will use PyTorch Lightning Native AMP for BFloat16 precision")
-                enable_bf16 = False
+                self.enable_bf16 = False
 
         if self.num_processes == 1:
             if self.use_ipex:
@@ -129,6 +128,14 @@ def _setup(
         # so we have to add optimizations in this method, which will be called in
         # user defined `train()` method.
 
+        # the following codes are copied from pl's LightningLite's `setup` method,
+        # ipex 1.9 requires `_move_model_to_device` after `_setup_model_and_optimizers`, but
+        # pl's `setup` method calls `_move_model_to_device` before `_setup_model_and_optimizers`,
+        # so we copy the codes and swap their order.
+        self._validate_setup(model, optimizers)
+
+        model, optimizers = self._strategy._setup_model_and_optimizers(model, list(optimizers))
+
         # add IPEX 1.11's optimization
         if self.use_ipex and not TORCH_VERSION_LESS_1_10:
             dtype = torch.bfloat16 if self.enable_bf16 else None
@@ -139,13 +146,6 @@ def _setup(
             else:
                 invalidInputError(False, "Ipex does not support more than one optimizers.")
 
-        # the following codes are copied from pl's LightningLite's `setup` method,
-        # ipex 1.9 requires `_move_model_to_device` after `_setup_model_and_optimizers`, but
-        # pl's `setup` method calls `_move_model_to_device` before `_setup_model_and_optimizers`,
-        # so we copy the codes and swap their order.
-        self._validate_setup(model, optimizers)
-
-        model, optimizers = self._strategy._setup_model_and_optimizers(model, optimizers)
         if move_to_device:
             model = self._move_model_to_device(model=model, optimizers=optimizers)
         model = _TorchNanoModule(model, self._precision_plugin)
diff --git a/python/nano/test/pytorch/tests/test_torch_nano_ipex.py b/python/nano/test/pytorch/tests/test_torch_nano_ipex.py
index 8d6b9ac4862..d9c25590a7b 100644
--- a/python/nano/test/pytorch/tests/test_torch_nano_ipex.py
+++ b/python/nano/test/pytorch/tests/test_torch_nano_ipex.py
@@ -49,7 +49,7 @@ def train(self, optimizer_supported: bool = False):
         model = ResNet18(10, pretrained=False, include_top=False, freeze=True)
         loss_func = nn.CrossEntropyLoss()
         if optimizer_supported:
-            optimizer = torch.optim.SGD(model.parameters, lr=0.01)
+            optimizer = torch.optim.SGD(model.parameters(), lr=0.01)
         else:
             optimizer = torch.optim.Adam(model.parameters(), lr=0.01)
         train_loader = create_data_loader(data_dir, batch_size, num_workers, data_transform)
@@ -66,7 +66,6 @@ def train(self, optimizer_supported: bool = False):
                 loss = loss_func(model(X), y)
                 self.backward(loss)
                 optimizer.step()
-                
                 total_loss += loss.sum()
                 num += 1
             print(f'avg_loss: {total_loss / num}')

From 5bcfab09d15c1c624742b9349da584cacba63541 Mon Sep 17 00:00:00 2001
From: Hu Mingzhi <mingzhi.hu@intel.com>
Date: Thu, 18 Aug 2022 00:45:24 +0000
Subject: [PATCH 29/32] Update

---
 python/nano/src/bigdl/nano/pytorch/torch_nano.py      | 9 ++++++---
 python/nano/src/bigdl/nano/pytorch/trainer/Trainer.py | 9 ++++++---
 2 files changed, 12 insertions(+), 6 deletions(-)

diff --git a/python/nano/src/bigdl/nano/pytorch/torch_nano.py b/python/nano/src/bigdl/nano/pytorch/torch_nano.py
index 4c9e9ab8c36..f83edfcfe54 100644
--- a/python/nano/src/bigdl/nano/pytorch/torch_nano.py
+++ b/python/nano/src/bigdl/nano/pytorch/torch_nano.py
@@ -66,8 +66,8 @@ def __init__(self, num_processes: int = 1,
         self.use_ipex = use_ipex
         self.enable_bf16 = self.use_ipex and kwargs.get('precision', None) == 'bf16'
 
-        # Set 'precision' for strategy without precision_plugin,
-        # Strategy > accelerator/precision/plugin
+        # Strategy has a higher priority than accelerator/precision/plugin,
+        # set precision for strategy without precision_plugin(e.g. ddp-spawn, ddp-subprocess)
         # torch must be greater or equal to 1.10 to use native amp for bfloat16 precision
         if TORCH_VERSION_LESS_1_10 and self.enable_bf16:
             kwargs['precision'] = 32
@@ -83,6 +83,7 @@ def __init__(self, num_processes: int = 1,
                         " without avx512 will crash. "
                         "Will use PyTorch Lightning Native AMP for BFloat16 precision")
                 self.enable_bf16 = False
+                kwargs['precision'] = 32
 
         if self.num_processes == 1:
             if self.use_ipex:
@@ -136,7 +137,9 @@ def _setup(
 
         model, optimizers = self._strategy._setup_model_and_optimizers(model, list(optimizers))
 
-        # add IPEX 1.11's optimization
+        # IPEX bfloat16 optimization will cast model parameters to `torch.bfloat16`
+        # which is not supported by ddp currently,
+        # so add IPEX 1.11's optimization after `_setup_model`
         if self.use_ipex and not TORCH_VERSION_LESS_1_10:
             dtype = torch.bfloat16 if self.enable_bf16 else None
             if len(optimizers) == 0:
diff --git a/python/nano/src/bigdl/nano/pytorch/trainer/Trainer.py b/python/nano/src/bigdl/nano/pytorch/trainer/Trainer.py
index 0e32b08a622..bab1adc6332 100644
--- a/python/nano/src/bigdl/nano/pytorch/trainer/Trainer.py
+++ b/python/nano/src/bigdl/nano/pytorch/trainer/Trainer.py
@@ -105,8 +105,8 @@ def __init__(self, num_processes: int = 1,
         self.use_ipex = use_ipex
         enable_bf16 = self.use_ipex and kwargs.get('precision', None) == 'bf16'
 
-        # Set 'precision' for strategy without precision_plugin,
-        # Strategy > accelerator/precision/plugin
+        # Strategy has a higher priority than accelerator/precision/plugin,
+        # set precision for strategy without precision_plugin(e.g. ddp-spawn, ddp-subprocess)
         # torch must be greater or equal to 1.10 to use native amp for bfloat16 precision
         if TORCH_VERSION_LESS_1_10 and enable_bf16:
             kwargs['precision'] = 32
@@ -120,8 +120,11 @@ def __init__(self, num_processes: int = 1,
             elif enable_bf16:
                 warning("Enable IPEX bfloat16 in a cpu instruction set"
                         " without avx512 will crash. "
-                        "Will use PyTorch Lightning Native AMP for BFloat16 precision")
+                        "Using 32-bit precision")
                 enable_bf16 = False
+                # IPEX-optimized model is incompatible with PL Native AMP,
+                # so fall back to 32-bit precision instead of staying at bfloat16 precision
+                kwargs['precision'] = 32
 
         if num_processes == 1:
             from bigdl.nano.pytorch.strategies import create_IPEXStrategy

From 3fad3ba16f1163c5d76876493735e458e3e09a27 Mon Sep 17 00:00:00 2001
From: Hu Mingzhi <mingzhi.hu@intel.com>
Date: Tue, 23 Aug 2022 00:30:36 +0000
Subject: [PATCH 30/32] Update bf16 api

---
 .../bigdl/nano/deps/ray/ray_distributed.py    | 10 +++---
 .../nano/pytorch/strategies/ddp_spawn.py      | 13 ++++---
 .../pytorch/strategies/ipex/ipex_strategy.py  | 23 ++++++------
 .../ipex/version_1_9/ipex_strategy_1_9.py     |  4 +--
 .../nano/src/bigdl/nano/pytorch/torch_nano.py | 36 +++++++++----------
 .../src/bigdl/nano/pytorch/trainer/Trainer.py | 36 ++++++++++---------
 6 files changed, 59 insertions(+), 63 deletions(-)

diff --git a/python/nano/src/bigdl/nano/deps/ray/ray_distributed.py b/python/nano/src/bigdl/nano/deps/ray/ray_distributed.py
index da16615cb8c..32d7cc7bcb6 100644
--- a/python/nano/src/bigdl/nano/deps/ray/ray_distributed.py
+++ b/python/nano/src/bigdl/nano/deps/ray/ray_distributed.py
@@ -176,7 +176,7 @@ def __init__(self,
                  num_cpus_per_worker: int = 1,
                  use_gpu: bool = False,
                  use_ipex: bool = False,
-                 enable_bf16: bool = False,
+                 dtype=None,
                  init_hook: Callable = None,
                  auto_lr: Union[bool, dict] = True,
                  **ddp_kwargs: Any):
@@ -207,7 +207,7 @@ def __init__(self,
         self.num_cpus_per_worker = num_cpus_per_worker
         self.use_gpu = use_gpu
         self.use_ipex = use_ipex
-        self.enable_bf16 = enable_bf16
+        self.dtype = dtype
         self.auto_lr = auto_lr
 
         invalidInputError(not self.use_gpu or not self.use_ipex,
@@ -328,14 +328,12 @@ def _unpack_lightning_optimizer(opt):
                     ]
 
         if self.use_ipex and not TORCH_VERSION_LESS_1_10:
-            dtype = torch.bfloat16 if self.enable_bf16 else None
             num_optimizers = len(self.optimizers)
-
             if num_optimizers == 1:
                 optimizer = self.optimizers[0]
-                ipex_optimize(self.model, optimizer=optimizer, inplace=True, dtype=dtype)
+                ipex_optimize(self.model, optimizer=optimizer, inplace=True, dtype=self.dtype)
             elif num_optimizers == 0:
-                ipex_optimize(self.model, inplace=True, dtype=dtype)
+                ipex_optimize(self.model, inplace=True, dtype=self.dtype)
             else:
                 warnings.warn(f"IPEX currently only support single optimizers, "
                               f"but got {num_optimizers}. Skip IPEX")
diff --git a/python/nano/src/bigdl/nano/pytorch/strategies/ddp_spawn.py b/python/nano/src/bigdl/nano/pytorch/strategies/ddp_spawn.py
index c476efe5428..558e1b58109 100644
--- a/python/nano/src/bigdl/nano/pytorch/strategies/ddp_spawn.py
+++ b/python/nano/src/bigdl/nano/pytorch/strategies/ddp_spawn.py
@@ -168,7 +168,7 @@ def __init__(
         num_processes: int = 1,
         cpu_for_each_process: Optional[List[List[int]]] = None,
         use_ipex=False,
-        enable_bf16=False,
+        dtype=None,
         auto_lr=False,
         **kwargs: Any
     ):
@@ -182,11 +182,11 @@ def __init__(
                              parallel_devices=parallel_devices,
                              cluster_environment=cluster_environment,
                              **kwargs)
-            if enable_bf16:
+            if dtype == torch.bfloat16:
                 import intel_pytorch_extension as ipex
                 # Automatically mix precision
                 ipex.enable_auto_mixed_precision(mixed_dtype=torch.bfloat16)
-        elif use_ipex and enable_bf16 and 'precision_plugin' not in kwargs:
+        elif use_ipex and dtype == torch.bfloat16 and 'precision_plugin' not in kwargs:
             from bigdl.nano.pytorch.strategies.ipex.ipex_strategy import IPEXBF16Precision
             super().__init__(parallel_devices=parallel_devices,
                              cluster_environment=cluster_environment,
@@ -197,7 +197,7 @@ def __init__(
         self.cpu_for_each_process = cpu_for_each_process
         self.is_distributed = True
         self.use_ipex = use_ipex
-        self.enable_bf16 = enable_bf16
+        self.dtype = dtype
         self.auto_lr = auto_lr
 
     def _configure_launcher(self):
@@ -259,14 +259,13 @@ def _unpack_lightning_optimizer(opt):
                     ]
 
         if self.use_ipex and not TORCH_VERSION_LESS_1_10:
-            dtype = torch.bfloat16 if self.enable_bf16 else None
             num_optimizers = len(self.optimizers)
 
             if num_optimizers == 1:
                 optimizer = self.optimizers[0]
-                ipex_optimize(self.model, optimizer=optimizer, inplace=True, dtype=dtype)
+                ipex_optimize(self.model, optimizer=optimizer, inplace=True, dtype=self.dtype)
             elif num_optimizers == 0:
-                ipex_optimize(self.model, inplace=True, dtype=dtype)
+                ipex_optimize(self.model, inplace=True, dtype=self.dtype)
             else:
                 warnings.warn(f"IPEX currently only support single optimizers, "
                               f"but got {num_optimizers}. Skip IPEX")
diff --git a/python/nano/src/bigdl/nano/pytorch/strategies/ipex/ipex_strategy.py b/python/nano/src/bigdl/nano/pytorch/strategies/ipex/ipex_strategy.py
index fe8343964dd..9b987f51d2e 100644
--- a/python/nano/src/bigdl/nano/pytorch/strategies/ipex/ipex_strategy.py
+++ b/python/nano/src/bigdl/nano/pytorch/strategies/ipex/ipex_strategy.py
@@ -27,7 +27,7 @@
 import pytorch_lightning as pl
 from pytorch_lightning.strategies import SingleDeviceStrategy
 from pytorch_lightning.accelerators.accelerator import Accelerator
-from pytorch_lightning.plugins.precision import PrecisionPlugin, MixedPrecisionPlugin
+from pytorch_lightning.plugins.precision import PrecisionPlugin
 from pytorch_lightning.utilities import AMPType
 
 from bigdl.nano.utils.log4Error import invalidInputError
@@ -46,7 +46,7 @@ def __init__(
         self,
         accelerator: Accelerator = IPEXAccelerator(),
         precision_plugin: PrecisionPlugin = PrecisionPlugin(),
-        enable_bf16=False,
+        dtype=None,
     ) -> None:
         """
         Create a IPEXStrategy.
@@ -54,9 +54,9 @@ def __init__(
         :param accelerator: the accelerator to handle hardware
         :param precision_plugin: the plugin to handle precision-specific parts
         """
-        self.enable_bf16 = enable_bf16
+        self.dtype = dtype
 
-        if enable_bf16 and isinstance(precision_plugin, PrecisionPlugin):
+        if self.dtype == torch.bfloat16 and isinstance(precision_plugin, PrecisionPlugin):
             precision_plugin = IPEXBF16Precision()
         super().__init__(accelerator=accelerator, precision_plugin=precision_plugin)
 
@@ -70,28 +70,22 @@ def setup(self, trainer: pl.Trainer) -> None:
         """
         super().setup(trainer)
 
-        dtype = torch.bfloat16 if self.enable_bf16 else None
         if len(self.optimizers) == 0:
-            ipex.optimize(self.model, inplace=True, dtype=dtype)
+            ipex.optimize(self.model, inplace=True, dtype=self.dtype)
         elif len(self.optimizers) == 1:
-            ipex.optimize(self.model, optimizer=self.optimizers[0], inplace=True, dtype=dtype)
+            ipex.optimize(self.model, optimizer=self.optimizers[0], inplace=True, dtype=self.dtype)
         else:
             invalidInputError(False, "Ipex does not support more than one optimizers.")
 
 
-class IPEXBF16Precision(MixedPrecisionPlugin):
+class IPEXBF16Precision(PrecisionPlugin):
     """Create Precision Plugin for IPEX BFloat16."""
 
-    backend: "AMPType" = AMPType.NATIVE
     precision: Union[str, int] = 'bf16'
 
     @contextmanager
     def forward_context(self):
         """AMP for managing model forward/training_step/evaluation_step/predict_step."""
-        # Using IPEX bf16 and torch.autocast(...) reports a segmentation fault
-        # in PyTorch 1.11.
-        # torch.autocast("cpu", args...) is equivalent to torch.cpu.amp.autocast(args...)
-        # in PyTorch 1.12.
         with torch.cpu.amp.autocast():
             yield
 
@@ -122,6 +116,9 @@ def optimizer_step(self,
             warning("Seems like you are using a custom optimizer,"
                     "please make sure that 'optimizer.step(closure)'"
                     " does not need to be called in training stage")
+
+        # For optimizer not in IPEX_FUSED_OPTIMIZER_LIST,
+        # `closure()` needs to be called to backward the loss to avoid `.grad` being None
         closure_result = closure()
         optimizer.step(**kwargs)
 
diff --git a/python/nano/src/bigdl/nano/pytorch/strategies/ipex/version_1_9/ipex_strategy_1_9.py b/python/nano/src/bigdl/nano/pytorch/strategies/ipex/version_1_9/ipex_strategy_1_9.py
index 71d3b72dd44..d7d93a32351 100644
--- a/python/nano/src/bigdl/nano/pytorch/strategies/ipex/version_1_9/ipex_strategy_1_9.py
+++ b/python/nano/src/bigdl/nano/pytorch/strategies/ipex/version_1_9/ipex_strategy_1_9.py
@@ -39,7 +39,7 @@ def __init__(
         self,
         accelerator: Accelerator = IPEXAccelerator(),   # type: ignore
         precision_plugin: PrecisionPlugin = PrecisionPlugin(),
-        enable_bf16=False,
+        dtype=None,
     ) -> None:
         """
         Create a IPEXStrategy.
@@ -47,7 +47,7 @@ def __init__(
         :param accelerator: the accelerator to handle hardware
         :param precision_plugin: the plugin to handle precision-specific parts
         """
-        if enable_bf16:
+        if dtype == torch.bfloat16:
             # Automatically mix precision
             ipex.enable_auto_mixed_precision(mixed_dtype=torch.bfloat16)
 
diff --git a/python/nano/src/bigdl/nano/pytorch/torch_nano.py b/python/nano/src/bigdl/nano/pytorch/torch_nano.py
index f83edfcfe54..5cbc5ab62cb 100644
--- a/python/nano/src/bigdl/nano/pytorch/torch_nano.py
+++ b/python/nano/src/bigdl/nano/pytorch/torch_nano.py
@@ -53,6 +53,7 @@ class TorchNano(LightningLite):
     def __init__(self, num_processes: int = 1,
                  use_ipex: bool = False,
                  strategy: str = "subprocess",
+                 precision: Union[str, int] = 32,
                  *args, **kwargs) -> None:
         """
         Create a TorchNano with nano acceleration.
@@ -64,44 +65,44 @@ def __init__(self, num_processes: int = 1,
         """
         self.num_processes = num_processes
         self.use_ipex = use_ipex
-        self.enable_bf16 = self.use_ipex and kwargs.get('precision', None) == 'bf16'
-
-        # Strategy has a higher priority than accelerator/precision/plugin,
-        # set precision for strategy without precision_plugin(e.g. ddp-spawn, ddp-subprocess)
-        # torch must be greater or equal to 1.10 to use native amp for bfloat16 precision
-        if TORCH_VERSION_LESS_1_10 and self.enable_bf16:
-            kwargs['precision'] = 32
+        self.dtype = None
+        if self.use_ipex and precision == 'bf16':
+            # Enable ipex bfloat16 weight prepack and disable native AMP
+            self.dtype = torch.float16
+            precision = 32
 
+        # Confirm if cpu supports AVX512
         if self.use_ipex and not check_avx512():
             if TORCH_VERSION_LESS_1_11:
                 warning("Enable ipex<=1.10 in a cpu instruction set"
                         " without avx512 will crash."
                         "Fall back to regular pytorch.")
                 self.use_ipex = False
-            elif self.enable_bf16:
+            elif self.dtype == torch.bfloat16:
                 warning("Enable IPEX bfloat16 in a cpu instruction set"
                         " without avx512 will crash. "
-                        "Will use PyTorch Lightning Native AMP for BFloat16 precision")
-                self.enable_bf16 = False
-                kwargs['precision'] = 32
+                        "Using 32-bit precision")
+                self.dtype = None
+
+        kwargs['precision'] = precision
 
         if self.num_processes == 1:
             if self.use_ipex:
-                strategy = create_IPEXStrategy(enable_bf16=self.enable_bf16)
+                strategy = create_IPEXStrategy(dtype=self.dtype)
             else:
                 strategy = None     # type: ignore
         elif strategy == "spawn":
             strategy = DDPSpawnStrategy(num_processes=self.num_processes,   # type: ignore
                                         use_ipex=self.use_ipex,
-                                        enable_bf16=self.enable_bf16)
+                                        dtype=self.dtype)
         elif strategy == "subprocess":
             strategy = DDPSubprocessStrategy(num_processes=self.num_processes,  # type: ignore
                                              use_ipex=self.use_ipex,
-                                             enable_bf16=self.enable_bf16)
+                                             dtype=self.dtype)
         elif strategy == "ray":
             strategy = create_RayStrategy(num_workers=self.num_processes,
                                           use_ipex=self.use_ipex,
-                                          enable_bf16=self.enable_bf16)
+                                          dtype=self.dtype)
         else:
             warning(f"Bigdl-nano doesn't support '{strategy}' strategy now, "
                     f"'{strategy}' strategy of pytorch_lightning will be used. "
@@ -141,11 +142,10 @@ def _setup(
         # which is not supported by ddp currently,
         # so add IPEX 1.11's optimization after `_setup_model`
         if self.use_ipex and not TORCH_VERSION_LESS_1_10:
-            dtype = torch.bfloat16 if self.enable_bf16 else None
             if len(optimizers) == 0:
-                ipex_optimize(model, inplace=True, dtype=dtype)
+                ipex_optimize(model, inplace=True, dtype=self.dtype)
             elif len(optimizers) == 1:
-                ipex_optimize(model, optimizer=optimizers[0], inplace=True, dtype=dtype)
+                ipex_optimize(model, optimizer=optimizers[0], inplace=True, dtype=self.dtype)
             else:
                 invalidInputError(False, "Ipex does not support more than one optimizers.")
 
diff --git a/python/nano/src/bigdl/nano/pytorch/trainer/Trainer.py b/python/nano/src/bigdl/nano/pytorch/trainer/Trainer.py
index bab1adc6332..ffca917a0d4 100644
--- a/python/nano/src/bigdl/nano/pytorch/trainer/Trainer.py
+++ b/python/nano/src/bigdl/nano/pytorch/trainer/Trainer.py
@@ -62,6 +62,7 @@ def __init__(self, num_processes: int = 1,
                  use_hpo=False,
                  channels_last: bool = False,
                  auto_lr: Union[int, bool] = True,
+                 precision: Union[str, int] = 32,
                  *args: Any, **kwargs: Any) -> None:
         """
         A pytorch lightning trainer that uses bigdl-nano optimization.
@@ -71,6 +72,9 @@ def __init__(self, num_processes: int = 1,
         :param cpu_for_each_process: A list of length `num_processes`, each containing a list of
             indices of cpus each process will be using. default: None, and the cpu will be
             automatically and evenly distributed among processes.
+        :param precision: Double precision (64), full precision (32), half precision (16)
+            or bfloat16 precision (bf16). Enable ipex bfloat16 weight prepack when `use_ipex=True`
+            and `precision='bf16'`
         """
         # Check keyword arguments
         if "accelerator" in kwargs:
@@ -103,32 +107,30 @@ def __init__(self, num_processes: int = 1,
                 kwargs["callbacks"] = [ChannelsLastCallback()]
 
         self.use_ipex = use_ipex
-        enable_bf16 = self.use_ipex and kwargs.get('precision', None) == 'bf16'
-
-        # Strategy has a higher priority than accelerator/precision/plugin,
-        # set precision for strategy without precision_plugin(e.g. ddp-spawn, ddp-subprocess)
-        # torch must be greater or equal to 1.10 to use native amp for bfloat16 precision
-        if TORCH_VERSION_LESS_1_10 and enable_bf16:
-            kwargs['precision'] = 32
+        dtype = None
+        if self.use_ipex and precision == 'bf16':
+            # Enable ipex bfloat16 weight prepack and disable pytorch-lightning native AMP
+            dtype = torch.bfloat16
+            precision = 32
 
+        # Confirm if cpu supports avx512
         if self.use_ipex and not check_avx512():
             if TORCH_VERSION_LESS_1_11:
-                warning("Enable ipex<=1.10 in a cpu instruction set"
+                warning("Enable ipex<=1.11 in a cpu instruction set"
                         " without avx512 will crash."
                         "Fall back to regular pytorch.")
                 self.use_ipex = False
-            elif enable_bf16:
+            elif dtype == torch.float16:
                 warning("Enable IPEX bfloat16 in a cpu instruction set"
                         " without avx512 will crash. "
                         "Using 32-bit precision")
-                enable_bf16 = False
-                # IPEX-optimized model is incompatible with PL Native AMP,
-                # so fall back to 32-bit precision instead of staying at bfloat16 precision
-                kwargs['precision'] = 32
+                dtype = None
+
+        kwargs['precision'] = precision
 
         if num_processes == 1:
             from bigdl.nano.pytorch.strategies import create_IPEXStrategy
-            strategy = create_IPEXStrategy(enable_bf16=enable_bf16) if self.use_ipex else None
+            strategy = create_IPEXStrategy(dtype=dtype) if self.use_ipex else None
             kwargs["strategy"] = strategy
             super().__init__(*args, **kwargs)
         else:
@@ -147,20 +149,20 @@ def __init__(self, num_processes: int = 1,
                 strategy = DDPSpawnStrategy(num_processes=num_processes,
                                             cpu_for_each_process=cpu_for_each_process,
                                             use_ipex=self.use_ipex,
-                                            enable_bf16=enable_bf16,
+                                            dtype=dtype,
                                             auto_lr=auto_lr)
             elif distributed_backend == "subprocess":
                 from bigdl.nano.pytorch.strategies import DDPSubprocessStrategy
                 strategy = DDPSubprocessStrategy(num_processes=num_processes,
                                                  cpu_for_each_process=cpu_for_each_process,
                                                  use_ipex=self.use_ipex,
-                                                 enable_bf16=enable_bf16,
+                                                 dtype=dtype,
                                                  auto_lr=auto_lr)
             elif distributed_backend == "ray":
                 from bigdl.nano.pytorch.strategies import create_RayStrategy
                 strategy = create_RayStrategy(num_workers=num_processes,
                                               use_ipex=self.use_ipex,
-                                              enable_bf16=enable_bf16,
+                                              dtype=dtype,
                                               auto_lr=auto_lr)
             kwargs["strategy"] = strategy
             super().__init__(*args, **kwargs)

From 1cd0215ed5731408dd4afcdea58f360ba774977e Mon Sep 17 00:00:00 2001
From: Hu Mingzhi <mingzhi.hu@intel.com>
Date: Tue, 23 Aug 2022 00:46:18 +0000
Subject: [PATCH 31/32] Update

---
 python/nano/src/bigdl/nano/pytorch/torch_nano.py      | 5 ++++-
 python/nano/src/bigdl/nano/pytorch/trainer/Trainer.py | 4 ++--
 2 files changed, 6 insertions(+), 3 deletions(-)

diff --git a/python/nano/src/bigdl/nano/pytorch/torch_nano.py b/python/nano/src/bigdl/nano/pytorch/torch_nano.py
index 5cbc5ab62cb..2a9721420ff 100644
--- a/python/nano/src/bigdl/nano/pytorch/torch_nano.py
+++ b/python/nano/src/bigdl/nano/pytorch/torch_nano.py
@@ -62,6 +62,9 @@ def __init__(self, num_processes: int = 1,
         :param use_ipex: whether use ipex acceleration, defaults to False
         :param strategy: use which backend in distributed mode, defaults to "subprocess", \
             now avaiable strategies are 'spawn', 'subprocess' and 'ray'
+        :param precision: Double precision (64), full precision (32), half precision (16)
+            or bfloat16 precision (bf16), defaults to 32.
+            Enable ipex bfloat16 weight prepack when `use_ipex=True` and `precision='bf16'`
         """
         self.num_processes = num_processes
         self.use_ipex = use_ipex
@@ -136,7 +139,7 @@ def _setup(
         # so we copy the codes and swap their order.
         self._validate_setup(model, optimizers)
 
-        model, optimizers = self._strategy._setup_model_and_optimizers(model, list(optimizers))
+        model, optimizers = self._strategy._setup_model_and_optimizers(model, optimizers)
 
         # IPEX bfloat16 optimization will cast model parameters to `torch.bfloat16`
         # which is not supported by ddp currently,
diff --git a/python/nano/src/bigdl/nano/pytorch/trainer/Trainer.py b/python/nano/src/bigdl/nano/pytorch/trainer/Trainer.py
index ffca917a0d4..bc70b9d558e 100644
--- a/python/nano/src/bigdl/nano/pytorch/trainer/Trainer.py
+++ b/python/nano/src/bigdl/nano/pytorch/trainer/Trainer.py
@@ -73,8 +73,8 @@ def __init__(self, num_processes: int = 1,
             indices of cpus each process will be using. default: None, and the cpu will be
             automatically and evenly distributed among processes.
         :param precision: Double precision (64), full precision (32), half precision (16)
-            or bfloat16 precision (bf16). Enable ipex bfloat16 weight prepack when `use_ipex=True`
-            and `precision='bf16'`
+            or bfloat16 precision (bf16), defaults to 32.
+            Enable ipex bfloat16 weight prepack when `use_ipex=True` and `precision='bf16'`
         """
         # Check keyword arguments
         if "accelerator" in kwargs:

From a7012862ca2a494796829520355c8bd95fcaa74c Mon Sep 17 00:00:00 2001
From: Hu Mingzhi <mingzhi.hu@intel.com>
Date: Tue, 23 Aug 2022 01:17:12 +0000
Subject: [PATCH 32/32] Fix typo

---
 python/nano/src/bigdl/nano/pytorch/torch_nano.py      | 2 +-
 python/nano/src/bigdl/nano/pytorch/trainer/Trainer.py | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/python/nano/src/bigdl/nano/pytorch/torch_nano.py b/python/nano/src/bigdl/nano/pytorch/torch_nano.py
index 2a9721420ff..4d9a461393f 100644
--- a/python/nano/src/bigdl/nano/pytorch/torch_nano.py
+++ b/python/nano/src/bigdl/nano/pytorch/torch_nano.py
@@ -71,7 +71,7 @@ def __init__(self, num_processes: int = 1,
         self.dtype = None
         if self.use_ipex and precision == 'bf16':
             # Enable ipex bfloat16 weight prepack and disable native AMP
-            self.dtype = torch.float16
+            self.dtype = torch.bfloat16
             precision = 32
 
         # Confirm if cpu supports AVX512
diff --git a/python/nano/src/bigdl/nano/pytorch/trainer/Trainer.py b/python/nano/src/bigdl/nano/pytorch/trainer/Trainer.py
index bc70b9d558e..85cbcecdc4f 100644
--- a/python/nano/src/bigdl/nano/pytorch/trainer/Trainer.py
+++ b/python/nano/src/bigdl/nano/pytorch/trainer/Trainer.py
@@ -120,7 +120,7 @@ def __init__(self, num_processes: int = 1,
                         " without avx512 will crash."
                         "Fall back to regular pytorch.")
                 self.use_ipex = False
-            elif dtype == torch.float16:
+            elif dtype == torch.bfloat16:
                 warning("Enable IPEX bfloat16 in a cpu instruction set"
                         " without avx512 will crash. "
                         "Using 32-bit precision")