From 1352f87e4c707a43b563f76b67118ca7bd4a6f06 Mon Sep 17 00:00:00 2001 From: Hu Mingzhi Date: Mon, 8 Aug 2022 00:18:50 +0000 Subject: [PATCH 01/32] Update --- .../pytorch/strategies/ipex/ipex_strategy.py | 38 +++++++++++++++++ .../test/pytorch/tests/test_trainer_ipex.py | 41 +++++++++++++++++++ 2 files changed, 79 insertions(+) diff --git a/python/nano/src/bigdl/nano/pytorch/strategies/ipex/ipex_strategy.py b/python/nano/src/bigdl/nano/pytorch/strategies/ipex/ipex_strategy.py index 26d23855c44..ec0f80dee03 100644 --- a/python/nano/src/bigdl/nano/pytorch/strategies/ipex/ipex_strategy.py +++ b/python/nano/src/bigdl/nano/pytorch/strategies/ipex/ipex_strategy.py @@ -14,13 +14,22 @@ # limitations under the License. # +from contextlib import contextmanager +from functools import partial +from typing import Any, Union, Callable + import torch +from torch.nn import Module +from torch.optim import Optimizer + import pytorch_lightning as pl from pytorch_lightning.strategies import SingleDeviceStrategy from pytorch_lightning.accelerators.accelerator import Accelerator from pytorch_lightning.plugins.precision import PrecisionPlugin + from bigdl.nano.utils.log4Error import invalidInputError import intel_extension_for_pytorch as ipex +from intel_extension_for_pytorch.optim._optimizer_utils import IPEX_FUSED_OPTIMIZER_LIST from .ipex_accelerator import IPEXAccelerator @@ -44,6 +53,9 @@ def __init__( """ self.enable_bf16 = enable_bf16 + if enable_bf16 and isinstance(precision_plugin, PrecisionPlugin): + precision_plugin = IPEXBF16Precision() + super().__init__(accelerator=accelerator, precision_plugin=precision_plugin) def setup(self, trainer: pl.Trainer) -> None: @@ -63,3 +75,29 @@ def setup(self, trainer: pl.Trainer) -> None: ipex.optimize(self.model, optimizer=self.optimizers[0], inplace=True, dtype=dtype) else: invalidInputError(False, "Ipex does not support more than one optimizers.") + + +class IPEXBF16Precision(PrecisionPlugin): + """Create Precision Plugin for IPEX BFloat16.""" + + @contextmanager + def forward_context(self): + """PyTorch AMP for managing model forward/training_step/evaluation_step/predict_step.""" + with torch.cpu.amp.autocast(): + yield + + def optimizer_step(self, + model: Union["pl.LightningModule", Module], + optimizer: Optimizer, + optimizer_idx: int, + closure: Callable[[], Any], + **kwargs: Any) -> Any: + """Hook to run the optimizer step.""" + if isinstance(model, pl.LightningModule): + closure = partial(self._wrap_closure, model, optimizer, optimizer_idx, closure) + + # Automatically call closure for optimizer not supported by IPEX + if type(optimizer) not in IPEX_FUSED_OPTIMIZER_LIST: + closure() + + return optimizer.step(closure, **kwargs) diff --git a/python/nano/test/pytorch/tests/test_trainer_ipex.py b/python/nano/test/pytorch/tests/test_trainer_ipex.py index 16847bcc35a..b5f24be9916 100644 --- a/python/nano/test/pytorch/tests/test_trainer_ipex.py +++ b/python/nano/test/pytorch/tests/test_trainer_ipex.py @@ -66,6 +66,47 @@ def test_trainer_save_checkpoint(self): pl_model = Trainer.compile(self.model, self.loss, self.optimizer, self.scheduler_dict) trainer.fit(pl_model, self.train_loader) + def test_trainer_ipex_bf16(self): + trainer = Trainer(max_epochs=max_epochs, use_ipex=True, enable_bf16=True) + + # use_ipex=True will perform inplace optimization + model = ResNet18(10, pretrained=False, include_top=False, freeze=True) + optimizer = torch.optim.SGD(model.parameters(), lr=0.01) + loss = nn.CrossEntropyLoss() + scheduler_dict = { + "scheduler": OneCycleLR( + optimizer, + 0.1, + epochs=max_epochs, + steps_per_epoch=len(self.train_loader), + ), + "interval": "step", + } + + pl_model = Trainer.compile(model, loss, optimizer, scheduler_dict) + trainer.fit(pl_model, self.train_loader) + trainer.test(pl_model, self.train_loader) + + def test_trainer_ipex_bf16_unspport_optim(self): + trainer = Trainer(max_epochs=max_epochs, use_ipex=True, enable_bf16=True) + + model = ResNet18(10, pretrained=False, include_top=False, freeze=True) + optimizer = torch.optim.AdamW(model.parameters(), lr=0.01, weight_decay=5e-4) + loss = nn.CrossEntropyLoss() + scheduler_dict = { + "scheduler": OneCycleLR( + optimizer, + 0.1, + epochs=max_epochs, + steps_per_epoch=len(self.train_loader), + ), + "interval": "step", + } + + pl_model = Trainer.compile(model, loss, optimizer, scheduler_dict) + trainer.fit(pl_model, self.train_loader) + trainer.test(pl_model, self.train_loader) + if __name__ == '__main__': pytest.main([__file__]) From 478b1233455fe71d6cf6cb0b4eb2cd0946e7f911 Mon Sep 17 00:00:00 2001 From: Hu Mingzhi Date: Mon, 8 Aug 2022 02:42:01 +0000 Subject: [PATCH 02/32] Fix code style --- .../src/bigdl/nano/pytorch/strategies/ipex/ipex_strategy.py | 2 +- python/nano/test/pytorch/utils/_train_torch_lightning.py | 4 +++- 2 files changed, 4 insertions(+), 2 deletions(-) diff --git a/python/nano/src/bigdl/nano/pytorch/strategies/ipex/ipex_strategy.py b/python/nano/src/bigdl/nano/pytorch/strategies/ipex/ipex_strategy.py index ec0f80dee03..e22c5bc6990 100644 --- a/python/nano/src/bigdl/nano/pytorch/strategies/ipex/ipex_strategy.py +++ b/python/nano/src/bigdl/nano/pytorch/strategies/ipex/ipex_strategy.py @@ -82,7 +82,7 @@ class IPEXBF16Precision(PrecisionPlugin): @contextmanager def forward_context(self): - """PyTorch AMP for managing model forward/training_step/evaluation_step/predict_step.""" + """AMP for managing model forward/training_step/evaluation_step/predict_step.""" with torch.cpu.amp.autocast(): yield diff --git a/python/nano/test/pytorch/utils/_train_torch_lightning.py b/python/nano/test/pytorch/utils/_train_torch_lightning.py index 36b4a2908e6..a2ab126e6c8 100644 --- a/python/nano/test/pytorch/utils/_train_torch_lightning.py +++ b/python/nano/test/pytorch/utils/_train_torch_lightning.py @@ -122,5 +122,7 @@ def train_torch_lightning(model, batch_size, num_workers, data_dir, use_ipex=Fal else: # Frozen parameters should not change if not torch.all(torch.eq(para1, para2)): - raise Exception(name + " freeze failed.") + raise Exception(name + " freeze failed.\n" + + para1 + "\n" + + para2 + "\n") print("pass") From f91c4d655c98a4fcd8dd592f8827ed9765262bbb Mon Sep 17 00:00:00 2001 From: Hu Mingzhi Date: Mon, 8 Aug 2022 02:56:40 +0000 Subject: [PATCH 03/32] re-run action --- python/nano/test/pytorch/utils/_train_torch_lightning.py | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/python/nano/test/pytorch/utils/_train_torch_lightning.py b/python/nano/test/pytorch/utils/_train_torch_lightning.py index a2ab126e6c8..859cc95c84d 100644 --- a/python/nano/test/pytorch/utils/_train_torch_lightning.py +++ b/python/nano/test/pytorch/utils/_train_torch_lightning.py @@ -122,7 +122,5 @@ def train_torch_lightning(model, batch_size, num_workers, data_dir, use_ipex=Fal else: # Frozen parameters should not change if not torch.all(torch.eq(para1, para2)): - raise Exception(name + " freeze failed.\n" - + para1 + "\n" - + para2 + "\n") + raise Exception(f"{name} freeze failed. \n {para1} \n {para2}") print("pass") From f5af29efc38d9ad1f73ca0fb0feae8f368060537 Mon Sep 17 00:00:00 2001 From: Hu Mingzhi Date: Mon, 8 Aug 2022 04:00:55 +0000 Subject: [PATCH 04/32] Fix code style --- python/nano/test/pytorch/tests/test_trainer_ipex.py | 11 +++++++++++ .../nano/test/pytorch/utils/_train_torch_lightning.py | 2 +- 2 files changed, 12 insertions(+), 1 deletion(-) diff --git a/python/nano/test/pytorch/tests/test_trainer_ipex.py b/python/nano/test/pytorch/tests/test_trainer_ipex.py index b5f24be9916..2f8150242da 100644 --- a/python/nano/test/pytorch/tests/test_trainer_ipex.py +++ b/python/nano/test/pytorch/tests/test_trainer_ipex.py @@ -26,6 +26,7 @@ from bigdl.nano.pytorch import Trainer from bigdl.nano.pytorch.vision.models import vision +from bigdl.nano.pytorch.utils import TORCH_VERSION_LESS_1_10 batch_size = 256 max_epochs = 2 @@ -86,6 +87,11 @@ def test_trainer_ipex_bf16(self): pl_model = Trainer.compile(model, loss, optimizer, scheduler_dict) trainer.fit(pl_model, self.train_loader) trainer.test(pl_model, self.train_loader) + + if TORCH_VERSION_LESS_1_10: + import intel_pytorch_extension as ipex + # Avoid affecting other tests + ipex.enable_auto_mixed_precision(None) def test_trainer_ipex_bf16_unspport_optim(self): trainer = Trainer(max_epochs=max_epochs, use_ipex=True, enable_bf16=True) @@ -107,6 +113,11 @@ def test_trainer_ipex_bf16_unspport_optim(self): trainer.fit(pl_model, self.train_loader) trainer.test(pl_model, self.train_loader) + if TORCH_VERSION_LESS_1_10: + import intel_pytorch_extension as ipex + # Avoid affecting other tests + ipex.enable_auto_mixed_precision(None) + if __name__ == '__main__': pytest.main([__file__]) diff --git a/python/nano/test/pytorch/utils/_train_torch_lightning.py b/python/nano/test/pytorch/utils/_train_torch_lightning.py index 859cc95c84d..36b4a2908e6 100644 --- a/python/nano/test/pytorch/utils/_train_torch_lightning.py +++ b/python/nano/test/pytorch/utils/_train_torch_lightning.py @@ -122,5 +122,5 @@ def train_torch_lightning(model, batch_size, num_workers, data_dir, use_ipex=Fal else: # Frozen parameters should not change if not torch.all(torch.eq(para1, para2)): - raise Exception(f"{name} freeze failed. \n {para1} \n {para2}") + raise Exception(name + " freeze failed.") print("pass") From d22b0948c4262db285dffa755486cb79d67c0b58 Mon Sep 17 00:00:00 2001 From: Hu Mingzhi Date: Tue, 9 Aug 2022 01:48:27 +0000 Subject: [PATCH 05/32] re-run action --- .../src/bigdl/nano/pytorch/strategies/ipex/ipex_strategy.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/python/nano/src/bigdl/nano/pytorch/strategies/ipex/ipex_strategy.py b/python/nano/src/bigdl/nano/pytorch/strategies/ipex/ipex_strategy.py index e22c5bc6990..6bea7380552 100644 --- a/python/nano/src/bigdl/nano/pytorch/strategies/ipex/ipex_strategy.py +++ b/python/nano/src/bigdl/nano/pytorch/strategies/ipex/ipex_strategy.py @@ -83,7 +83,8 @@ class IPEXBF16Precision(PrecisionPlugin): @contextmanager def forward_context(self): """AMP for managing model forward/training_step/evaluation_step/predict_step.""" - with torch.cpu.amp.autocast(): + # Manually set the dtype + with torch.cpu.amp.autocast(dtype=torch.bfloat16): yield def optimizer_step(self, From b35b2745382863ab3c9f60ff0eec75a287ee45d1 Mon Sep 17 00:00:00 2001 From: Hu Mingzhi Date: Wed, 10 Aug 2022 00:56:19 +0000 Subject: [PATCH 06/32] Update --- .../nano/pytorch/strategies/ipex/ipex_strategy.py | 13 ++++++++----- 1 file changed, 8 insertions(+), 5 deletions(-) diff --git a/python/nano/src/bigdl/nano/pytorch/strategies/ipex/ipex_strategy.py b/python/nano/src/bigdl/nano/pytorch/strategies/ipex/ipex_strategy.py index 6bea7380552..51137188d3c 100644 --- a/python/nano/src/bigdl/nano/pytorch/strategies/ipex/ipex_strategy.py +++ b/python/nano/src/bigdl/nano/pytorch/strategies/ipex/ipex_strategy.py @@ -94,11 +94,14 @@ def optimizer_step(self, closure: Callable[[], Any], **kwargs: Any) -> Any: """Hook to run the optimizer step.""" + if type(optimizer) in IPEX_FUSED_OPTIMIZER_LIST: + return super().optimizer_step(model, optimizer, optimizer_idx, closure, **kwargs) + if isinstance(model, pl.LightningModule): closure = partial(self._wrap_closure, model, optimizer, optimizer_idx, closure) + + closure_result = closure() + optimizer.step(closure=None, **kwargs) + + return closure_result - # Automatically call closure for optimizer not supported by IPEX - if type(optimizer) not in IPEX_FUSED_OPTIMIZER_LIST: - closure() - - return optimizer.step(closure, **kwargs) From 5052561e85dc1c33d89f666922e15e78a4faf579 Mon Sep 17 00:00:00 2001 From: Hu Mingzhi Date: Wed, 10 Aug 2022 01:06:52 +0000 Subject: [PATCH 07/32] Fix code style --- .../bigdl/nano/pytorch/strategies/ipex/ipex_strategy.py | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/python/nano/src/bigdl/nano/pytorch/strategies/ipex/ipex_strategy.py b/python/nano/src/bigdl/nano/pytorch/strategies/ipex/ipex_strategy.py index 51137188d3c..51c4591da76 100644 --- a/python/nano/src/bigdl/nano/pytorch/strategies/ipex/ipex_strategy.py +++ b/python/nano/src/bigdl/nano/pytorch/strategies/ipex/ipex_strategy.py @@ -96,12 +96,11 @@ def optimizer_step(self, """Hook to run the optimizer step.""" if type(optimizer) in IPEX_FUSED_OPTIMIZER_LIST: return super().optimizer_step(model, optimizer, optimizer_idx, closure, **kwargs) - + if isinstance(model, pl.LightningModule): closure = partial(self._wrap_closure, model, optimizer, optimizer_idx, closure) - + closure_result = closure() optimizer.step(closure=None, **kwargs) - - return closure_result + return closure_result From 5df5f0781488fa34d3036ff5632df0bdd8e9808a Mon Sep 17 00:00:00 2001 From: Hu Mingzhi Date: Wed, 10 Aug 2022 05:43:13 +0000 Subject: [PATCH 08/32] support bf16 multi training --- .../src/bigdl/nano/pytorch/strategies/ddp_spawn.py | 5 +++++ python/nano/test/pytorch/tests/test_plugin_ipex.py | 10 ++++++++++ 2 files changed, 15 insertions(+) diff --git a/python/nano/src/bigdl/nano/pytorch/strategies/ddp_spawn.py b/python/nano/src/bigdl/nano/pytorch/strategies/ddp_spawn.py index 84e6d29f1d2..413fae8658b 100644 --- a/python/nano/src/bigdl/nano/pytorch/strategies/ddp_spawn.py +++ b/python/nano/src/bigdl/nano/pytorch/strategies/ddp_spawn.py @@ -61,6 +61,7 @@ ipex_optimize, create_IPEXAccelerator, to_cpu from bigdl.nano.pytorch.utils import TORCH_VERSION_LESS_1_10 from bigdl.nano.utils.log4Error import invalidInputError +from bigdl.nano.pytorch.strategies.ipex.ipex_strategy import IPEXBF16Precision import logging import warnings @@ -181,6 +182,10 @@ def __init__( super().__init__(accelerator=create_IPEXAccelerator(), parallel_devices=parallel_devices, cluster_environment=cluster_environment, **kwargs) + elif use_ipex and enable_bf16 and 'precision_plugin' not in kwargs: + super().__init__(parallel_devices=parallel_devices, + cluster_environment=cluster_environment, + precision_plugin=IPEXBF16Precision(), **kwargs) else: super().__init__(parallel_devices=parallel_devices, cluster_environment=cluster_environment, **kwargs) diff --git a/python/nano/test/pytorch/tests/test_plugin_ipex.py b/python/nano/test/pytorch/tests/test_plugin_ipex.py index 609e1e449ad..ec73288faa5 100644 --- a/python/nano/test/pytorch/tests/test_plugin_ipex.py +++ b/python/nano/test/pytorch/tests/test_plugin_ipex.py @@ -64,6 +64,16 @@ def test_trainer_subprocess_plugin(self): trainer.fit(pl_model, self.data_loader, self.test_data_loader) trainer.test(pl_model, self.test_data_loader) + def test_trainer_subprocess_plugin_bf16(self): + pl_model = LightningModule( + self.model, self.loss, self.optimizer, + metrics=[torchmetrics.F1(num_classes), torchmetrics.Accuracy(num_classes=10)] + ) + trainer = Trainer(num_processes=2, distributed_backend="subprocess", + max_epochs=4, use_ipex=True, enable_bf16=True, + callbacks=[CheckIPEXCallback()]) + trainer.fit(pl_model, self.data_loader, self.test_data_loader) + trainer.test(pl_model, self.test_data_loader) if __name__ == '__main__': pytest.main([__file__]) From 3748082f976f62141d084c1fc9447d85814cb62b Mon Sep 17 00:00:00 2001 From: Hu Mingzhi Date: Wed, 10 Aug 2022 05:56:07 +0000 Subject: [PATCH 09/32] Update --- python/nano/src/bigdl/nano/pytorch/strategies/ddp_spawn.py | 2 +- python/nano/test/pytorch/tests/test_plugin_ipex.py | 1 + 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/python/nano/src/bigdl/nano/pytorch/strategies/ddp_spawn.py b/python/nano/src/bigdl/nano/pytorch/strategies/ddp_spawn.py index 413fae8658b..94e10ff30a3 100644 --- a/python/nano/src/bigdl/nano/pytorch/strategies/ddp_spawn.py +++ b/python/nano/src/bigdl/nano/pytorch/strategies/ddp_spawn.py @@ -61,7 +61,6 @@ ipex_optimize, create_IPEXAccelerator, to_cpu from bigdl.nano.pytorch.utils import TORCH_VERSION_LESS_1_10 from bigdl.nano.utils.log4Error import invalidInputError -from bigdl.nano.pytorch.strategies.ipex.ipex_strategy import IPEXBF16Precision import logging import warnings @@ -183,6 +182,7 @@ def __init__( parallel_devices=parallel_devices, cluster_environment=cluster_environment, **kwargs) elif use_ipex and enable_bf16 and 'precision_plugin' not in kwargs: + from bigdl.nano.pytorch.strategies.ipex.ipex_strategy import IPEXBF16Precision super().__init__(parallel_devices=parallel_devices, cluster_environment=cluster_environment, precision_plugin=IPEXBF16Precision(), **kwargs) diff --git a/python/nano/test/pytorch/tests/test_plugin_ipex.py b/python/nano/test/pytorch/tests/test_plugin_ipex.py index ec73288faa5..3c2fdd82732 100644 --- a/python/nano/test/pytorch/tests/test_plugin_ipex.py +++ b/python/nano/test/pytorch/tests/test_plugin_ipex.py @@ -75,5 +75,6 @@ def test_trainer_subprocess_plugin_bf16(self): trainer.fit(pl_model, self.data_loader, self.test_data_loader) trainer.test(pl_model, self.test_data_loader) + if __name__ == '__main__': pytest.main([__file__]) From 4073f20e78b0dd7604c24d7a9668d2c0aa2b3c8f Mon Sep 17 00:00:00 2001 From: Hu Mingzhi Date: Thu, 11 Aug 2022 14:53:19 +0000 Subject: [PATCH 10/32] Update --- .../pytorch/strategies/ipex/ipex_strategy.py | 26 +++++++++++++++---- .../src/bigdl/nano/pytorch/trainer/Trainer.py | 7 ++++- .../test/pytorch/tests/test_plugin_ipex.py | 11 +++++--- .../nano/test/pytorch/tests/test_trainer.py | 20 ++++++++++++-- .../test/pytorch/tests/test_trainer_ipex.py | 12 ++++++--- .../pytorch/utils/_train_ipex_callback.py | 14 ++++++++++ 6 files changed, 75 insertions(+), 15 deletions(-) diff --git a/python/nano/src/bigdl/nano/pytorch/strategies/ipex/ipex_strategy.py b/python/nano/src/bigdl/nano/pytorch/strategies/ipex/ipex_strategy.py index 51c4591da76..4e583d09b50 100644 --- a/python/nano/src/bigdl/nano/pytorch/strategies/ipex/ipex_strategy.py +++ b/python/nano/src/bigdl/nano/pytorch/strategies/ipex/ipex_strategy.py @@ -16,16 +16,18 @@ from contextlib import contextmanager from functools import partial +from logging import warning from typing import Any, Union, Callable import torch from torch.nn import Module -from torch.optim import Optimizer +from torch.optim import Optimizer, LBFGS +from bigdl.nano.pytorch.utils import TORCH_VERSION_LESS_1_12 import pytorch_lightning as pl from pytorch_lightning.strategies import SingleDeviceStrategy from pytorch_lightning.accelerators.accelerator import Accelerator -from pytorch_lightning.plugins.precision import PrecisionPlugin +from pytorch_lightning.plugins.precision import PrecisionPlugin, NativeMixedPrecisionPlugin from bigdl.nano.utils.log4Error import invalidInputError import intel_extension_for_pytorch as ipex @@ -55,7 +57,6 @@ def __init__( if enable_bf16 and isinstance(precision_plugin, PrecisionPlugin): precision_plugin = IPEXBF16Precision() - super().__init__(accelerator=accelerator, precision_plugin=precision_plugin) def setup(self, trainer: pl.Trainer) -> None: @@ -83,7 +84,10 @@ class IPEXBF16Precision(PrecisionPlugin): @contextmanager def forward_context(self): """AMP for managing model forward/training_step/evaluation_step/predict_step.""" - # Manually set the dtype + # Using IPEX bf16 and torch.autocast(...) will raise a segmentation fault + # in PyTorch 1.11. + # torch.autocast("cpu", args...) is equivalent to torch.cpu.amp.autocast(args...) + # in PyTorch 1.12. with torch.cpu.amp.autocast(dtype=torch.bfloat16): yield @@ -100,7 +104,19 @@ def optimizer_step(self, if isinstance(model, pl.LightningModule): closure = partial(self._wrap_closure, model, optimizer, optimizer_idx, closure) + # Only `torch.optim.LBFGS` need to reevaluate closure multiple times + # in optimizer.step(...) now. + if isinstance(optimizer, LBFGS): + invalidInputError(False, + "IPEX BFloat16 and the LBFGS optimizer are not compatible " + f"(optimizer {optimizer_idx}") + + # Detect custom optimzer + if type(optimizer).__name__ not in dir(torch.optim): + warning("Closure use in optimizer.step(...) is not currently supported" + " if IPEX and BFloat16 are enabled.") + closure_result = closure() - optimizer.step(closure=None, **kwargs) + optimizer.step(**kwargs) return closure_result diff --git a/python/nano/src/bigdl/nano/pytorch/trainer/Trainer.py b/python/nano/src/bigdl/nano/pytorch/trainer/Trainer.py index de7ba402026..ee6d3fd81a8 100644 --- a/python/nano/src/bigdl/nano/pytorch/trainer/Trainer.py +++ b/python/nano/src/bigdl/nano/pytorch/trainer/Trainer.py @@ -57,7 +57,6 @@ class Trainer(pl.Trainer): def __init__(self, num_processes: int = 1, use_ipex: bool = False, - enable_bf16=False, distributed_backend="subprocess", cpu_for_each_process: Optional[List[List[int]]] = None, use_hpo=False, @@ -111,6 +110,12 @@ def __init__(self, num_processes: int = 1, self.use_ipex = use_ipex + enable_bf16 = False + + if self.use_ipex and kwargs.get('precision', None) == "bf16": + # No need to set precision to 32, because Strategy > Accelerator/precision/plugins + enable_bf16 = True + if num_processes == 1: from bigdl.nano.pytorch.strategies import create_IPEXStrategy strategy = create_IPEXStrategy(enable_bf16=enable_bf16) if self.use_ipex else None diff --git a/python/nano/test/pytorch/tests/test_plugin_ipex.py b/python/nano/test/pytorch/tests/test_plugin_ipex.py index 3c2fdd82732..a1826c15d00 100644 --- a/python/nano/test/pytorch/tests/test_plugin_ipex.py +++ b/python/nano/test/pytorch/tests/test_plugin_ipex.py @@ -27,7 +27,7 @@ from test.pytorch.utils._train_torch_lightning import create_data_loader, data_transform from test.pytorch.utils._train_torch_lightning import create_test_data_loader -from test.pytorch.utils._train_ipex_callback import CheckIPEXCallback +from test.pytorch.utils._train_ipex_callback import CheckIPEXCallback, CheckIPEXFusedStepCallback from test.pytorch.tests.test_lightning import ResNet18 num_classes = 10 @@ -65,13 +65,16 @@ def test_trainer_subprocess_plugin(self): trainer.test(pl_model, self.test_data_loader) def test_trainer_subprocess_plugin_bf16(self): + model = ResNet18(pretrained=False, include_top=False, freeze=True) + loss = nn.CrossEntropyLoss() + optimizer = torch.optim.Adam(model.parameters(), lr=0.01) pl_model = LightningModule( - self.model, self.loss, self.optimizer, + model, loss, optimizer, metrics=[torchmetrics.F1(num_classes), torchmetrics.Accuracy(num_classes=10)] ) trainer = Trainer(num_processes=2, distributed_backend="subprocess", - max_epochs=4, use_ipex=True, enable_bf16=True, - callbacks=[CheckIPEXCallback()]) + max_epochs=4, use_ipex=True, precision="bf16", + callbacks=[CheckIPEXCallback(), CheckIPEXFusedStepCallback()]) trainer.fit(pl_model, self.data_loader, self.test_data_loader) trainer.test(pl_model, self.test_data_loader) diff --git a/python/nano/test/pytorch/tests/test_trainer.py b/python/nano/test/pytorch/tests/test_trainer.py index 614c44ef348..824714cbb3c 100644 --- a/python/nano/test/pytorch/tests/test_trainer.py +++ b/python/nano/test/pytorch/tests/test_trainer.py @@ -22,11 +22,13 @@ import pytest import torch +from torch.utils.data import DataLoader, TensorDataset from pytorch_lightning import LightningModule +from pytorch_lightning.plugins.precision.native_amp import NativeMixedPrecisionPlugin +from pytorch_lightning.plugins.precision.double import DoublePrecisionPlugin from test.pytorch.utils._train_torch_lightning import create_data_loader, data_transform from test.pytorch.utils._train_torch_lightning import train_with_linear_top_layer from torch import nn -import torchmetrics from bigdl.nano.pytorch import Trainer from bigdl.nano.pytorch.vision.models import vision @@ -78,6 +80,20 @@ def test_trainer_compile(self): pl_model = Trainer.compile(self.model, self.loss, self.optimizer) trainer.fit(pl_model, self.train_loader) + def test_trainer_precision_bf16(self): + model = ResNet18(10, pretrained=False, include_top=False, freeze=True) + loss = nn.CrossEntropyLoss() + optimizer = torch.optim.Adam(model.parameters(), lr=0.01) + trainer = Trainer(max_epochs=1, precision='bf16') + pl_model = Trainer.compile(model, loss, optimizer) + trainer.fit(pl_model, self.train_loader) + assert isinstance(trainer.strategy.precision_plugin, NativeMixedPrecisionPlugin) + # model is not converted to bfloat16 precision + input = TensorDataset(torch.rand(1, 3, 32, 32)) + train_loader = DataLoader(input) + y_hat = trainer.predict(pl_model, train_loader) + assert y_hat[0].dtype is torch.bfloat16 + def test_trainer_save_load(self): trainer = Trainer(max_epochs=1) pl_model = Trainer.compile(self.model, self.loss, self.optimizer) @@ -96,7 +112,7 @@ def test_trainer_save_load(self): for k in original_state_dict.keys(): assert (original_state_dict[k] == loaded_state_dict[k]).all() shutil.rmtree('saved_model') - + if __name__ == '__main__': pytest.main([__file__]) diff --git a/python/nano/test/pytorch/tests/test_trainer_ipex.py b/python/nano/test/pytorch/tests/test_trainer_ipex.py index 2f8150242da..2731ddedceb 100644 --- a/python/nano/test/pytorch/tests/test_trainer_ipex.py +++ b/python/nano/test/pytorch/tests/test_trainer_ipex.py @@ -15,6 +15,7 @@ # +from gc import callbacks import os from unittest import TestCase @@ -22,6 +23,7 @@ import torch from torch.optim.lr_scheduler import OneCycleLR from test.pytorch.utils._train_torch_lightning import create_data_loader, data_transform +from test.pytorch.utils._train_ipex_callback import CheckIPEXFusedStepCallback from torch import nn from bigdl.nano.pytorch import Trainer @@ -68,7 +70,8 @@ def test_trainer_save_checkpoint(self): trainer.fit(pl_model, self.train_loader) def test_trainer_ipex_bf16(self): - trainer = Trainer(max_epochs=max_epochs, use_ipex=True, enable_bf16=True) + trainer = Trainer(max_epochs=max_epochs, use_ipex=True, precision="bf16", + callbacks=[CheckIPEXFusedStepCallback()]) # use_ipex=True will perform inplace optimization model = ResNet18(10, pretrained=False, include_top=False, freeze=True) @@ -87,14 +90,16 @@ def test_trainer_ipex_bf16(self): pl_model = Trainer.compile(model, loss, optimizer, scheduler_dict) trainer.fit(pl_model, self.train_loader) trainer.test(pl_model, self.train_loader) - + if TORCH_VERSION_LESS_1_10: import intel_pytorch_extension as ipex + # Diable IPEX AMP # Avoid affecting other tests ipex.enable_auto_mixed_precision(None) def test_trainer_ipex_bf16_unspport_optim(self): - trainer = Trainer(max_epochs=max_epochs, use_ipex=True, enable_bf16=True) + trainer = Trainer(max_epochs=max_epochs, use_ipex=True, precision="bf16", + callbacks=[CheckIPEXFusedStepCallback()]) model = ResNet18(10, pretrained=False, include_top=False, freeze=True) optimizer = torch.optim.AdamW(model.parameters(), lr=0.01, weight_decay=5e-4) @@ -115,6 +120,7 @@ def test_trainer_ipex_bf16_unspport_optim(self): if TORCH_VERSION_LESS_1_10: import intel_pytorch_extension as ipex + # Diable IPEX AMP # Avoid affecting other tests ipex.enable_auto_mixed_precision(None) diff --git a/python/nano/test/pytorch/utils/_train_ipex_callback.py b/python/nano/test/pytorch/utils/_train_ipex_callback.py index 2dd36b59c07..3379c5373ac 100644 --- a/python/nano/test/pytorch/utils/_train_ipex_callback.py +++ b/python/nano/test/pytorch/utils/_train_ipex_callback.py @@ -17,6 +17,7 @@ import torch import warnings from typing import Dict +import pytorch_lightning as pl from pytorch_lightning.callbacks import Callback from pytorch_lightning.plugins.training_type import SingleDevicePlugin, DDPSpawnPlugin from bigdl.nano.pytorch.utils import TORCH_VERSION_LESS_1_10 @@ -68,3 +69,16 @@ def check_ipex_layers(m): return False assert check_ipex_layers(pl_module) + +class CheckIPEXFusedStepCallback(Callback): + def on_train_start(self, trainer: "pl.Trainer", pl_module: "pl.LightningModule"): + if not TORCH_VERSION_LESS_1_10: + from intel_extension_for_pytorch.optim._optimizer_utils import IPEX_FUSED_OPTIMIZER_LIST + # IPEX only support one optimizer + opt = trainer.optimizers[0] + if type(opt) in IPEX_FUSED_OPTIMIZER_LIST: + assert opt.fused # type: ignore + else: + # Check non-fused step + assert hasattr(opt, '_original_step') + assert getattr(opt, 'step') is not getattr(type(opt), 'step') From ee8ce0f23fd9502148e88281a7fe336bb0ccf526 Mon Sep 17 00:00:00 2001 From: Hu Mingzhi Date: Fri, 12 Aug 2022 00:26:30 +0000 Subject: [PATCH 11/32] Update --- .../pytorch/strategies/ipex/ipex_strategy.py | 2 +- .../src/bigdl/nano/pytorch/trainer/Trainer.py | 3 ++- .../nano/test/pytorch/tests/test_trainer.py | 23 ++++++++++++------- .../test/pytorch/tests/test_trainer_ipex.py | 1 - .../pytorch/utils/_train_ipex_callback.py | 20 +++++++++------- 5 files changed, 30 insertions(+), 19 deletions(-) diff --git a/python/nano/src/bigdl/nano/pytorch/strategies/ipex/ipex_strategy.py b/python/nano/src/bigdl/nano/pytorch/strategies/ipex/ipex_strategy.py index 4e583d09b50..3ce2b32d952 100644 --- a/python/nano/src/bigdl/nano/pytorch/strategies/ipex/ipex_strategy.py +++ b/python/nano/src/bigdl/nano/pytorch/strategies/ipex/ipex_strategy.py @@ -84,7 +84,7 @@ class IPEXBF16Precision(PrecisionPlugin): @contextmanager def forward_context(self): """AMP for managing model forward/training_step/evaluation_step/predict_step.""" - # Using IPEX bf16 and torch.autocast(...) will raise a segmentation fault + # Using IPEX bf16 and torch.autocast(...) reports a segmentation fault # in PyTorch 1.11. # torch.autocast("cpu", args...) is equivalent to torch.cpu.amp.autocast(args...) # in PyTorch 1.12. diff --git a/python/nano/src/bigdl/nano/pytorch/trainer/Trainer.py b/python/nano/src/bigdl/nano/pytorch/trainer/Trainer.py index ee6d3fd81a8..dbf90578c98 100644 --- a/python/nano/src/bigdl/nano/pytorch/trainer/Trainer.py +++ b/python/nano/src/bigdl/nano/pytorch/trainer/Trainer.py @@ -113,8 +113,9 @@ def __init__(self, num_processes: int = 1, enable_bf16 = False if self.use_ipex and kwargs.get('precision', None) == "bf16": - # No need to set precision to 32, because Strategy > Accelerator/precision/plugins enable_bf16 = True + if TORCH_VERSION_LESS_1_10: + kwargs['precision'] = 32 if num_processes == 1: from bigdl.nano.pytorch.strategies import create_IPEXStrategy diff --git a/python/nano/test/pytorch/tests/test_trainer.py b/python/nano/test/pytorch/tests/test_trainer.py index 824714cbb3c..ceb0e513c4d 100644 --- a/python/nano/test/pytorch/tests/test_trainer.py +++ b/python/nano/test/pytorch/tests/test_trainer.py @@ -32,6 +32,7 @@ from bigdl.nano.pytorch import Trainer from bigdl.nano.pytorch.vision.models import vision +from bigdl.nano.pytorch.utils import TORCH_VERSION_LESS_1_10 batch_size = 256 num_workers = 0 @@ -84,15 +85,21 @@ def test_trainer_precision_bf16(self): model = ResNet18(10, pretrained=False, include_top=False, freeze=True) loss = nn.CrossEntropyLoss() optimizer = torch.optim.Adam(model.parameters(), lr=0.01) - trainer = Trainer(max_epochs=1, precision='bf16') pl_model = Trainer.compile(model, loss, optimizer) - trainer.fit(pl_model, self.train_loader) - assert isinstance(trainer.strategy.precision_plugin, NativeMixedPrecisionPlugin) - # model is not converted to bfloat16 precision - input = TensorDataset(torch.rand(1, 3, 32, 32)) - train_loader = DataLoader(input) - y_hat = trainer.predict(pl_model, train_loader) - assert y_hat[0].dtype is torch.bfloat16 + if TORCH_VERSION_LESS_1_10: + trainer = Trainer(max_epochs=1, precision='bf16') + trainer.fit(pl_model, self.train_loader) + assert isinstance(trainer.strategy.precision_plugin, NativeMixedPrecisionPlugin) + # model is not converted to bfloat16 precision + input = TensorDataset(torch.rand(1, 3, 32, 32)) + train_loader = DataLoader(input) + y_hat = trainer.predict(pl_model, train_loader) + assert y_hat[0].dtype is torch.bfloat16 + else: + trainer = Trainer(max_epochs=1, precision=64) + trainer.fit(pl_model, self.train_loader) + assert isinstance(trainer.strategy.precision_plugin, DoublePrecisionPlugin) + assert optimizer.param_groups[0]['params'][0].dtype is torch.float64 def test_trainer_save_load(self): trainer = Trainer(max_epochs=1) diff --git a/python/nano/test/pytorch/tests/test_trainer_ipex.py b/python/nano/test/pytorch/tests/test_trainer_ipex.py index 2731ddedceb..897397a08cc 100644 --- a/python/nano/test/pytorch/tests/test_trainer_ipex.py +++ b/python/nano/test/pytorch/tests/test_trainer_ipex.py @@ -15,7 +15,6 @@ # -from gc import callbacks import os from unittest import TestCase diff --git a/python/nano/test/pytorch/utils/_train_ipex_callback.py b/python/nano/test/pytorch/utils/_train_ipex_callback.py index 3379c5373ac..684289c8606 100644 --- a/python/nano/test/pytorch/utils/_train_ipex_callback.py +++ b/python/nano/test/pytorch/utils/_train_ipex_callback.py @@ -26,11 +26,12 @@ class CheckIPEXCallback(Callback): def on_train_start(self, trainer, pl_module): - if trainer.use_ipex == False: - warnings.warn("CheckIPEXCallback is used, but ipex is disabled. ") - return + if not trainer.use_ipex: + warnings.warn("CheckIPEXCallback is used, but ipex is disabled. ") + return if TORCH_VERSION_LESS_1_10: from bigdl.nano.deps.ipex.version_1_9.ipex_torchfunctional import RESTORE_TYPE + def check_device(obj): if torch.is_tensor(obj): if obj.device.type == 'xpu': @@ -46,15 +47,18 @@ def check_device(obj): assert check_device(pl_module.state_dict()) else: from intel_extension_for_pytorch.nn.utils._model_convert import _LSTM - from intel_extension_for_pytorch.nn.utils._weight_prepack import _IPEXConvNd, _IPEXLinear, _IPEXConvTransposeNd + from intel_extension_for_pytorch.nn.utils._weight_prepack import (_IPEXConvNd, + _IPEXLinear, + _IPEXConvTransposeNd) IPEX_LAYERS = (_LSTM, _IPEXConvNd, _IPEXLinear, _IPEXConvTransposeNd) - IPEX_ATTR = ('master_weight', - 'weight_trail', - 'master_bias', - 'bias_trail') + IPEX_ATTR = ('master_weight', + 'weight_trail', + 'master_bias', + 'bias_trail') + def check_ipex_layers(m): if isinstance(m, IPEX_LAYERS): print("model is optimized by IPEX") From 8798671664904c602198df985b7f08ae4ab57dfa Mon Sep 17 00:00:00 2001 From: Hu Mingzhi Date: Fri, 12 Aug 2022 00:55:29 +0000 Subject: [PATCH 12/32] Update --- python/nano/test/pytorch/tests/test_trainer.py | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/python/nano/test/pytorch/tests/test_trainer.py b/python/nano/test/pytorch/tests/test_trainer.py index ceb0e513c4d..ae8b869ff05 100644 --- a/python/nano/test/pytorch/tests/test_trainer.py +++ b/python/nano/test/pytorch/tests/test_trainer.py @@ -87,6 +87,11 @@ def test_trainer_precision_bf16(self): optimizer = torch.optim.Adam(model.parameters(), lr=0.01) pl_model = Trainer.compile(model, loss, optimizer) if TORCH_VERSION_LESS_1_10: + trainer = Trainer(max_epochs=1, precision=64) + trainer.fit(pl_model, self.train_loader) + assert isinstance(trainer.strategy.precision_plugin, DoublePrecisionPlugin) + assert optimizer.param_groups[0]['params'][0].dtype is torch.float64 + else: trainer = Trainer(max_epochs=1, precision='bf16') trainer.fit(pl_model, self.train_loader) assert isinstance(trainer.strategy.precision_plugin, NativeMixedPrecisionPlugin) @@ -95,11 +100,6 @@ def test_trainer_precision_bf16(self): train_loader = DataLoader(input) y_hat = trainer.predict(pl_model, train_loader) assert y_hat[0].dtype is torch.bfloat16 - else: - trainer = Trainer(max_epochs=1, precision=64) - trainer.fit(pl_model, self.train_loader) - assert isinstance(trainer.strategy.precision_plugin, DoublePrecisionPlugin) - assert optimizer.param_groups[0]['params'][0].dtype is torch.float64 def test_trainer_save_load(self): trainer = Trainer(max_epochs=1) From 152da313c89ff3a7ac8ed49671c369ce5b519142 Mon Sep 17 00:00:00 2001 From: Hu Mingzhi Date: Mon, 15 Aug 2022 00:10:00 +0000 Subject: [PATCH 13/32] Update --- .../nano/pytorch/strategies/ipex/ipex_strategy.py | 12 +++++++----- .../nano/src/bigdl/nano/pytorch/trainer/Trainer.py | 3 +++ 2 files changed, 10 insertions(+), 5 deletions(-) diff --git a/python/nano/src/bigdl/nano/pytorch/strategies/ipex/ipex_strategy.py b/python/nano/src/bigdl/nano/pytorch/strategies/ipex/ipex_strategy.py index 3ce2b32d952..28166f72e8f 100644 --- a/python/nano/src/bigdl/nano/pytorch/strategies/ipex/ipex_strategy.py +++ b/python/nano/src/bigdl/nano/pytorch/strategies/ipex/ipex_strategy.py @@ -88,7 +88,7 @@ def forward_context(self): # in PyTorch 1.11. # torch.autocast("cpu", args...) is equivalent to torch.cpu.amp.autocast(args...) # in PyTorch 1.12. - with torch.cpu.amp.autocast(dtype=torch.bfloat16): + with torch.cpu.amp.autocast(): yield def optimizer_step(self, @@ -109,13 +109,15 @@ def optimizer_step(self, if isinstance(optimizer, LBFGS): invalidInputError(False, "IPEX BFloat16 and the LBFGS optimizer are not compatible " - f"(optimizer {optimizer_idx}") + f"(optimizer {optimizer_idx}", + "Hint: Set 'use_ipex' to False or not set 'precision' to 'bf16'" + " if LBFGS optimizer is necessary") # Detect custom optimzer if type(optimizer).__name__ not in dir(torch.optim): - warning("Closure use in optimizer.step(...) is not currently supported" - " if IPEX and BFloat16 are enabled.") - + warning("Seems like you are using a custom optimizer," + "please make sure that 'optimizer.step(closure)'" + " does not need to be called in training stage") closure_result = closure() optimizer.step(**kwargs) diff --git a/python/nano/src/bigdl/nano/pytorch/trainer/Trainer.py b/python/nano/src/bigdl/nano/pytorch/trainer/Trainer.py index dbf90578c98..9f70187267f 100644 --- a/python/nano/src/bigdl/nano/pytorch/trainer/Trainer.py +++ b/python/nano/src/bigdl/nano/pytorch/trainer/Trainer.py @@ -112,8 +112,11 @@ def __init__(self, num_processes: int = 1, enable_bf16 = False + # enable precision plugin for IPEX BF16 if self.use_ipex and kwargs.get('precision', None) == "bf16": enable_bf16 = True + # No need to set `precision` because strategy has higher priority + # than accelerator/plugin if TORCH_VERSION_LESS_1_10: kwargs['precision'] = 32 From 8468c52cb124f9adf91f4890e759426a7248998d Mon Sep 17 00:00:00 2001 From: Hu Mingzhi Date: Mon, 15 Aug 2022 04:44:35 +0000 Subject: [PATCH 14/32] Update --- .../nano/test/pytorch/tests/test_trainer.py | 23 ------- .../pytorch/tests/test_trainer_precision.py | 67 +++++++++++++++++++ 2 files changed, 67 insertions(+), 23 deletions(-) create mode 100644 python/nano/test/pytorch/tests/test_trainer_precision.py diff --git a/python/nano/test/pytorch/tests/test_trainer.py b/python/nano/test/pytorch/tests/test_trainer.py index ae8b869ff05..d66d6faceea 100644 --- a/python/nano/test/pytorch/tests/test_trainer.py +++ b/python/nano/test/pytorch/tests/test_trainer.py @@ -22,10 +22,7 @@ import pytest import torch -from torch.utils.data import DataLoader, TensorDataset from pytorch_lightning import LightningModule -from pytorch_lightning.plugins.precision.native_amp import NativeMixedPrecisionPlugin -from pytorch_lightning.plugins.precision.double import DoublePrecisionPlugin from test.pytorch.utils._train_torch_lightning import create_data_loader, data_transform from test.pytorch.utils._train_torch_lightning import train_with_linear_top_layer from torch import nn @@ -81,26 +78,6 @@ def test_trainer_compile(self): pl_model = Trainer.compile(self.model, self.loss, self.optimizer) trainer.fit(pl_model, self.train_loader) - def test_trainer_precision_bf16(self): - model = ResNet18(10, pretrained=False, include_top=False, freeze=True) - loss = nn.CrossEntropyLoss() - optimizer = torch.optim.Adam(model.parameters(), lr=0.01) - pl_model = Trainer.compile(model, loss, optimizer) - if TORCH_VERSION_LESS_1_10: - trainer = Trainer(max_epochs=1, precision=64) - trainer.fit(pl_model, self.train_loader) - assert isinstance(trainer.strategy.precision_plugin, DoublePrecisionPlugin) - assert optimizer.param_groups[0]['params'][0].dtype is torch.float64 - else: - trainer = Trainer(max_epochs=1, precision='bf16') - trainer.fit(pl_model, self.train_loader) - assert isinstance(trainer.strategy.precision_plugin, NativeMixedPrecisionPlugin) - # model is not converted to bfloat16 precision - input = TensorDataset(torch.rand(1, 3, 32, 32)) - train_loader = DataLoader(input) - y_hat = trainer.predict(pl_model, train_loader) - assert y_hat[0].dtype is torch.bfloat16 - def test_trainer_save_load(self): trainer = Trainer(max_epochs=1) pl_model = Trainer.compile(self.model, self.loss, self.optimizer) diff --git a/python/nano/test/pytorch/tests/test_trainer_precision.py b/python/nano/test/pytorch/tests/test_trainer_precision.py new file mode 100644 index 00000000000..790304f83d6 --- /dev/null +++ b/python/nano/test/pytorch/tests/test_trainer_precision.py @@ -0,0 +1,67 @@ +import os +from unittest import TestCase + +import pytest +import torch +from torch import nn +from torch.utils.data import DataLoader, TensorDataset +from pytorch_lightning.plugins.precision.native_amp import NativeMixedPrecisionPlugin +from pytorch_lightning.plugins.precision.double import DoublePrecisionPlugin + +from bigdl.nano.pytorch import Trainer +from bigdl.nano.pytorch.vision.models import vision +from bigdl.nano.pytorch.utils import TORCH_VERSION_LESS_1_10 + +from test.pytorch.tests.test_scale_lr import ResNetBase +from test.pytorch.utils._train_torch_lightning import (create_data_loader, + create_test_data_loader, + data_transform) + +batch_size = 32 +dataset_size = 256 +num_workers = 0 +data_dir = os.path.join(os.path.dirname(__file__), "../data") + + +class ResNet18(nn.Module): + def __init__(self, num_classes, pretrained=True, include_top=False, freeze=True): + super().__init__() + backbone = vision.resnet18(pretrained=pretrained, include_top=include_top, freeze=freeze) + output_size = backbone.get_output_size() + head = nn.Linear(output_size, num_classes) + self.model = nn.Sequential(backbone, head) + + def forward(self, x): + return self.model(x) + + +class TestTrainer(TestCase): + train_loader = create_data_loader(data_dir, batch_size, num_workers, + data_transform, dataset_size) + test_loader = create_test_data_loader(data_dir, batch_size, num_workers, + data_transform, dataset_size) + + def test_trainer_precision(self): + model = ResNet18(10, pretrained=False, include_top=False, freeze=True) + loss = nn.CrossEntropyLoss() + optimizer = torch.optim.Adam(model.parameters(), lr=0.01) + pl_model = Trainer.compile(model, loss, optimizer) + if TORCH_VERSION_LESS_1_10: + trainer = Trainer(max_epochs=4, precision=64) + trainer.fit(pl_model, self.train_loader) + assert isinstance(trainer.strategy.precision_plugin, DoublePrecisionPlugin) + for opt in pl_model.optimizers(): + assert opt.param_groups[0]['params'][0].dtype is torch.float64 + else: + trainer = Trainer(max_epochs=4, precision='bf16') + trainer.fit(pl_model, self.train_loader) + assert isinstance(trainer.strategy.precision_plugin, NativeMixedPrecisionPlugin) + # model is not converted to bfloat16 precision + input = TensorDataset(torch.rand(1, 3, 32, 32)) + train_loader = DataLoader(input) + y_hat = trainer.predict(pl_model, train_loader) + assert y_hat[0].dtype is torch.bfloat16 + + +if __name__ == '__main__': + pytest.main([__file__]) From d58f61657d5f9974063ea982213e40bea8d9ad94 Mon Sep 17 00:00:00 2001 From: Hu Mingzhi Date: Mon, 15 Aug 2022 04:47:33 +0000 Subject: [PATCH 15/32] Update --- .../pytorch/tests/test_trainer_precision.py | 17 +++++++++++++++++ 1 file changed, 17 insertions(+) diff --git a/python/nano/test/pytorch/tests/test_trainer_precision.py b/python/nano/test/pytorch/tests/test_trainer_precision.py index 790304f83d6..9ebaa291b7a 100644 --- a/python/nano/test/pytorch/tests/test_trainer_precision.py +++ b/python/nano/test/pytorch/tests/test_trainer_precision.py @@ -1,3 +1,20 @@ +# +# Copyright 2016 The BigDL Authors. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + + import os from unittest import TestCase From 4f0b04cc01ed1b43811bc7411f4e512b87a4e943 Mon Sep 17 00:00:00 2001 From: Hu Mingzhi Date: Mon, 15 Aug 2022 05:19:52 +0000 Subject: [PATCH 16/32] Update --- python/nano/test/pytorch/tests/test_trainer_precision.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/python/nano/test/pytorch/tests/test_trainer_precision.py b/python/nano/test/pytorch/tests/test_trainer_precision.py index 9ebaa291b7a..31e7ad7ca4a 100644 --- a/python/nano/test/pytorch/tests/test_trainer_precision.py +++ b/python/nano/test/pytorch/tests/test_trainer_precision.py @@ -67,8 +67,8 @@ def test_trainer_precision(self): trainer = Trainer(max_epochs=4, precision=64) trainer.fit(pl_model, self.train_loader) assert isinstance(trainer.strategy.precision_plugin, DoublePrecisionPlugin) - for opt in pl_model.optimizers(): - assert opt.param_groups[0]['params'][0].dtype is torch.float64 + opt = pl_model.optimizers() + assert opt.param_groups[0]['params'][0].dtype is torch.float64 else: trainer = Trainer(max_epochs=4, precision='bf16') trainer.fit(pl_model, self.train_loader) From df09ca691b9f8d46482556d05ac650fdbbafe6e4 Mon Sep 17 00:00:00 2001 From: Hu Mingzhi Date: Mon, 15 Aug 2022 06:36:17 +0000 Subject: [PATCH 17/32] Update --- python/nano/test/pytorch/tests/test_plugin_ipex.py | 5 +++++ python/nano/test/pytorch/tests/test_trainer.py | 4 ++-- python/nano/test/pytorch/tests/test_trainer_ipex.py | 7 +++++++ 3 files changed, 14 insertions(+), 2 deletions(-) diff --git a/python/nano/test/pytorch/tests/test_plugin_ipex.py b/python/nano/test/pytorch/tests/test_plugin_ipex.py index a1826c15d00..31b6c3659f6 100644 --- a/python/nano/test/pytorch/tests/test_plugin_ipex.py +++ b/python/nano/test/pytorch/tests/test_plugin_ipex.py @@ -24,6 +24,8 @@ from bigdl.nano.pytorch.lightning import LightningModule from bigdl.nano.pytorch import Trainer +from bigdl.nano.common import check_avx512 +from bigdl.nano.pytorch.utils import TORCH_VERSION_LESS_1_10 from test.pytorch.utils._train_torch_lightning import create_data_loader, data_transform from test.pytorch.utils._train_torch_lightning import create_test_data_loader @@ -65,6 +67,9 @@ def test_trainer_subprocess_plugin(self): trainer.test(pl_model, self.test_data_loader) def test_trainer_subprocess_plugin_bf16(self): + # IPEX BF16 weight prepack needs the cpu support avx512bw, avx512vl and avx512dq + if not TORCH_VERSION_LESS_1_10 and not check_avx512(): + return model = ResNet18(pretrained=False, include_top=False, freeze=True) loss = nn.CrossEntropyLoss() optimizer = torch.optim.Adam(model.parameters(), lr=0.01) diff --git a/python/nano/test/pytorch/tests/test_trainer.py b/python/nano/test/pytorch/tests/test_trainer.py index d66d6faceea..614c44ef348 100644 --- a/python/nano/test/pytorch/tests/test_trainer.py +++ b/python/nano/test/pytorch/tests/test_trainer.py @@ -26,10 +26,10 @@ from test.pytorch.utils._train_torch_lightning import create_data_loader, data_transform from test.pytorch.utils._train_torch_lightning import train_with_linear_top_layer from torch import nn +import torchmetrics from bigdl.nano.pytorch import Trainer from bigdl.nano.pytorch.vision.models import vision -from bigdl.nano.pytorch.utils import TORCH_VERSION_LESS_1_10 batch_size = 256 num_workers = 0 @@ -96,7 +96,7 @@ def test_trainer_save_load(self): for k in original_state_dict.keys(): assert (original_state_dict[k] == loaded_state_dict[k]).all() shutil.rmtree('saved_model') - + if __name__ == '__main__': pytest.main([__file__]) diff --git a/python/nano/test/pytorch/tests/test_trainer_ipex.py b/python/nano/test/pytorch/tests/test_trainer_ipex.py index 897397a08cc..f608ba32985 100644 --- a/python/nano/test/pytorch/tests/test_trainer_ipex.py +++ b/python/nano/test/pytorch/tests/test_trainer_ipex.py @@ -28,6 +28,7 @@ from bigdl.nano.pytorch import Trainer from bigdl.nano.pytorch.vision.models import vision from bigdl.nano.pytorch.utils import TORCH_VERSION_LESS_1_10 +from bigdl.nano.common import check_avx512 batch_size = 256 max_epochs = 2 @@ -69,6 +70,9 @@ def test_trainer_save_checkpoint(self): trainer.fit(pl_model, self.train_loader) def test_trainer_ipex_bf16(self): + # IPEX BF16 weight prepack needs the cpu support avx512bw, avx512vl and avx512dq + if not TORCH_VERSION_LESS_1_10 and not check_avx512(): + return trainer = Trainer(max_epochs=max_epochs, use_ipex=True, precision="bf16", callbacks=[CheckIPEXFusedStepCallback()]) @@ -97,6 +101,9 @@ def test_trainer_ipex_bf16(self): ipex.enable_auto_mixed_precision(None) def test_trainer_ipex_bf16_unspport_optim(self): + # IPEX BF16 weight prepack needs the cpu support avx512bw, avx512vl and avx512dq + if not TORCH_VERSION_LESS_1_10 and not check_avx512(): + return trainer = Trainer(max_epochs=max_epochs, use_ipex=True, precision="bf16", callbacks=[CheckIPEXFusedStepCallback()]) From a0c7e33b0745103bce5c1e9453e1006a4ae57b46 Mon Sep 17 00:00:00 2001 From: Hu Mingzhi Date: Mon, 15 Aug 2022 07:08:59 +0000 Subject: [PATCH 18/32] Update --- python/nano/src/bigdl/nano/pytorch/trainer/Trainer.py | 4 ++++ python/nano/test/pytorch/tests/test_plugin_ipex.py | 2 +- python/nano/test/pytorch/tests/test_trainer_ipex.py | 4 ++-- 3 files changed, 7 insertions(+), 3 deletions(-) diff --git a/python/nano/src/bigdl/nano/pytorch/trainer/Trainer.py b/python/nano/src/bigdl/nano/pytorch/trainer/Trainer.py index 9f70187267f..5827a7855f3 100644 --- a/python/nano/src/bigdl/nano/pytorch/trainer/Trainer.py +++ b/python/nano/src/bigdl/nano/pytorch/trainer/Trainer.py @@ -107,6 +107,10 @@ def __init__(self, num_processes: int = 1, " without avx512 will crash." "Fall back to regular pytorch.") use_ipex = False + # torch must be greater or equal to 1.10 to use bfloat16 without ipex + if TORCH_VERSION_LESS_1_10: + if kwargs.get('precision', None) == "bf16": + kwargs['precision'] = 32 self.use_ipex = use_ipex diff --git a/python/nano/test/pytorch/tests/test_plugin_ipex.py b/python/nano/test/pytorch/tests/test_plugin_ipex.py index 31b6c3659f6..cb2aa663639 100644 --- a/python/nano/test/pytorch/tests/test_plugin_ipex.py +++ b/python/nano/test/pytorch/tests/test_plugin_ipex.py @@ -68,7 +68,7 @@ def test_trainer_subprocess_plugin(self): def test_trainer_subprocess_plugin_bf16(self): # IPEX BF16 weight prepack needs the cpu support avx512bw, avx512vl and avx512dq - if not TORCH_VERSION_LESS_1_10 and not check_avx512(): + if not check_avx512(): return model = ResNet18(pretrained=False, include_top=False, freeze=True) loss = nn.CrossEntropyLoss() diff --git a/python/nano/test/pytorch/tests/test_trainer_ipex.py b/python/nano/test/pytorch/tests/test_trainer_ipex.py index f608ba32985..aa7178dc415 100644 --- a/python/nano/test/pytorch/tests/test_trainer_ipex.py +++ b/python/nano/test/pytorch/tests/test_trainer_ipex.py @@ -71,7 +71,7 @@ def test_trainer_save_checkpoint(self): def test_trainer_ipex_bf16(self): # IPEX BF16 weight prepack needs the cpu support avx512bw, avx512vl and avx512dq - if not TORCH_VERSION_LESS_1_10 and not check_avx512(): + if not check_avx512(): return trainer = Trainer(max_epochs=max_epochs, use_ipex=True, precision="bf16", callbacks=[CheckIPEXFusedStepCallback()]) @@ -102,7 +102,7 @@ def test_trainer_ipex_bf16(self): def test_trainer_ipex_bf16_unspport_optim(self): # IPEX BF16 weight prepack needs the cpu support avx512bw, avx512vl and avx512dq - if not TORCH_VERSION_LESS_1_10 and not check_avx512(): + if not check_avx512(): return trainer = Trainer(max_epochs=max_epochs, use_ipex=True, precision="bf16", callbacks=[CheckIPEXFusedStepCallback()]) From 3614bd5f731b31ebdb9fc12002f3e3e2db96160a Mon Sep 17 00:00:00 2001 From: Hu Mingzhi Date: Mon, 15 Aug 2022 09:27:21 +0000 Subject: [PATCH 19/32] Update --- .../src/bigdl/nano/pytorch/trainer/Trainer.py | 37 +++++++++---------- 1 file changed, 17 insertions(+), 20 deletions(-) diff --git a/python/nano/src/bigdl/nano/pytorch/trainer/Trainer.py b/python/nano/src/bigdl/nano/pytorch/trainer/Trainer.py index 5827a7855f3..40126b98977 100644 --- a/python/nano/src/bigdl/nano/pytorch/trainer/Trainer.py +++ b/python/nano/src/bigdl/nano/pytorch/trainer/Trainer.py @@ -102,27 +102,24 @@ def __init__(self, num_processes: int = 1, else: kwargs["callbacks"] = [ChannelsLastCallback()] - if TORCH_VERSION_LESS_1_11 and use_ipex and not check_avx512(): - warning("Enable ipex<=1.10 in a cpu instruction set" - " without avx512 will crash." - "Fall back to regular pytorch.") - use_ipex = False - # torch must be greater or equal to 1.10 to use bfloat16 without ipex - if TORCH_VERSION_LESS_1_10: - if kwargs.get('precision', None) == "bf16": - kwargs['precision'] = 32 - self.use_ipex = use_ipex - - enable_bf16 = False - - # enable precision plugin for IPEX BF16 - if self.use_ipex and kwargs.get('precision', None) == "bf16": - enable_bf16 = True - # No need to set `precision` because strategy has higher priority - # than accelerator/plugin - if TORCH_VERSION_LESS_1_10: - kwargs['precision'] = 32 + enable_bf16 = self.use_ipex and kwargs.get('precision', None) == 'bf16' + + if self.use_ipex and not check_avx512(): + if TORCH_VERSION_LESS_1_11: + warning("Enable ipex<=1.10 in a cpu instruction set" + " without avx512 will crash." + "Fall back to regular pytorch.") + self.use_ipex = False + if TORCH_VERSION_LESS_1_10 and enable_bf16: + warning("torch must be greater or equal to 1.10 to use bfloat16 without ipex." + "Will use 32-bit precision") + kwargs['precision'] = 32 + elif enable_bf16: + warning("Enable IPEX bfloat16 in a cpu instruction set" + " without avx512 will crash. " + "Will use PyTorch Lightning BFloat16 Mixed Precision") + enable_bf16 = False if num_processes == 1: from bigdl.nano.pytorch.strategies import create_IPEXStrategy From 61f1d2a293548304c72a1fabbb06215e392cfcd6 Mon Sep 17 00:00:00 2001 From: Hu Mingzhi Date: Mon, 15 Aug 2022 09:48:13 +0000 Subject: [PATCH 20/32] Update --- python/nano/test/pytorch/tests/test_bf16_ipex.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/python/nano/test/pytorch/tests/test_bf16_ipex.py b/python/nano/test/pytorch/tests/test_bf16_ipex.py index 38e45ce165d..7eb27feb3d1 100644 --- a/python/nano/test/pytorch/tests/test_bf16_ipex.py +++ b/python/nano/test/pytorch/tests/test_bf16_ipex.py @@ -20,6 +20,7 @@ from torchvision.models.resnet import resnet18 from unittest.mock import MagicMock, Mock, PropertyMock, patch from bigdl.nano.pytorch.utils import TORCH_VERSION_LESS_1_10, TORCH_VERSION_LESS_1_12 +from bigdl.nano.common import check_avx512 class Pytorch1_9: @@ -40,6 +41,8 @@ def test_bf16_common(self): """ Debug mode. Allow run bf16 forward without bf16 instruction support. """ + if not check_avx512(): + return trainer = Trainer(max_epochs=1) model = resnet18(num_classes=10) From 3b957a4b69248eb71adf90d9b7a7fa4a5d95c757 Mon Sep 17 00:00:00 2001 From: Hu Mingzhi Date: Mon, 15 Aug 2022 09:49:22 +0000 Subject: [PATCH 21/32] Update --- python/nano/test/pytorch/tests/test_bf16_ipex.py | 3 --- 1 file changed, 3 deletions(-) diff --git a/python/nano/test/pytorch/tests/test_bf16_ipex.py b/python/nano/test/pytorch/tests/test_bf16_ipex.py index 7eb27feb3d1..38e45ce165d 100644 --- a/python/nano/test/pytorch/tests/test_bf16_ipex.py +++ b/python/nano/test/pytorch/tests/test_bf16_ipex.py @@ -20,7 +20,6 @@ from torchvision.models.resnet import resnet18 from unittest.mock import MagicMock, Mock, PropertyMock, patch from bigdl.nano.pytorch.utils import TORCH_VERSION_LESS_1_10, TORCH_VERSION_LESS_1_12 -from bigdl.nano.common import check_avx512 class Pytorch1_9: @@ -41,8 +40,6 @@ def test_bf16_common(self): """ Debug mode. Allow run bf16 forward without bf16 instruction support. """ - if not check_avx512(): - return trainer = Trainer(max_epochs=1) model = resnet18(num_classes=10) From d055eea2e62aa4d9b9fd0fa1430cbf507caeb018 Mon Sep 17 00:00:00 2001 From: Hu Mingzhi Date: Mon, 15 Aug 2022 10:52:13 +0000 Subject: [PATCH 22/32] Update --- .../nano/src/bigdl/nano/pytorch/strategies/ddp_spawn.py | 7 ++++++- python/nano/src/bigdl/nano/pytorch/trainer/Trainer.py | 9 +++++---- python/nano/test/pytorch/tests/test_plugin_ipex.py | 5 +++++ 3 files changed, 16 insertions(+), 5 deletions(-) diff --git a/python/nano/src/bigdl/nano/pytorch/strategies/ddp_spawn.py b/python/nano/src/bigdl/nano/pytorch/strategies/ddp_spawn.py index 94e10ff30a3..c476efe5428 100644 --- a/python/nano/src/bigdl/nano/pytorch/strategies/ddp_spawn.py +++ b/python/nano/src/bigdl/nano/pytorch/strategies/ddp_spawn.py @@ -180,7 +180,12 @@ def __init__( if use_ipex and TORCH_VERSION_LESS_1_10 and 'accelerator' not in kwargs: super().__init__(accelerator=create_IPEXAccelerator(), parallel_devices=parallel_devices, - cluster_environment=cluster_environment, **kwargs) + cluster_environment=cluster_environment, + **kwargs) + if enable_bf16: + import intel_pytorch_extension as ipex + # Automatically mix precision + ipex.enable_auto_mixed_precision(mixed_dtype=torch.bfloat16) elif use_ipex and enable_bf16 and 'precision_plugin' not in kwargs: from bigdl.nano.pytorch.strategies.ipex.ipex_strategy import IPEXBF16Precision super().__init__(parallel_devices=parallel_devices, diff --git a/python/nano/src/bigdl/nano/pytorch/trainer/Trainer.py b/python/nano/src/bigdl/nano/pytorch/trainer/Trainer.py index 40126b98977..6f58a33a978 100644 --- a/python/nano/src/bigdl/nano/pytorch/trainer/Trainer.py +++ b/python/nano/src/bigdl/nano/pytorch/trainer/Trainer.py @@ -105,16 +105,17 @@ def __init__(self, num_processes: int = 1, self.use_ipex = use_ipex enable_bf16 = self.use_ipex and kwargs.get('precision', None) == 'bf16' + # No need to set precision for torch greater or equal to 1.10, + # because strategy > accelerator/precision/plugin + if TORCH_VERSION_LESS_1_10 and enable_bf16: + kwargs['precision'] = 32 + if self.use_ipex and not check_avx512(): if TORCH_VERSION_LESS_1_11: warning("Enable ipex<=1.10 in a cpu instruction set" " without avx512 will crash." "Fall back to regular pytorch.") self.use_ipex = False - if TORCH_VERSION_LESS_1_10 and enable_bf16: - warning("torch must be greater or equal to 1.10 to use bfloat16 without ipex." - "Will use 32-bit precision") - kwargs['precision'] = 32 elif enable_bf16: warning("Enable IPEX bfloat16 in a cpu instruction set" " without avx512 will crash. " diff --git a/python/nano/test/pytorch/tests/test_plugin_ipex.py b/python/nano/test/pytorch/tests/test_plugin_ipex.py index cb2aa663639..13e1b572664 100644 --- a/python/nano/test/pytorch/tests/test_plugin_ipex.py +++ b/python/nano/test/pytorch/tests/test_plugin_ipex.py @@ -82,6 +82,11 @@ def test_trainer_subprocess_plugin_bf16(self): callbacks=[CheckIPEXCallback(), CheckIPEXFusedStepCallback()]) trainer.fit(pl_model, self.data_loader, self.test_data_loader) trainer.test(pl_model, self.test_data_loader) + if TORCH_VERSION_LESS_1_10: + import intel_pytorch_extension as ipex + # Diable IPEX AMP + # Avoid affecting other tests + ipex.enable_auto_mixed_precision(None) if __name__ == '__main__': From 45d8b8bb70d9b1cc932491e60bb04eb7c782717d Mon Sep 17 00:00:00 2001 From: Hu Mingzhi Date: Mon, 15 Aug 2022 13:22:40 +0000 Subject: [PATCH 23/32] Update --- python/nano/src/bigdl/nano/pytorch/trainer/Trainer.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/python/nano/src/bigdl/nano/pytorch/trainer/Trainer.py b/python/nano/src/bigdl/nano/pytorch/trainer/Trainer.py index 6f58a33a978..fdb34ae6341 100644 --- a/python/nano/src/bigdl/nano/pytorch/trainer/Trainer.py +++ b/python/nano/src/bigdl/nano/pytorch/trainer/Trainer.py @@ -105,8 +105,8 @@ def __init__(self, num_processes: int = 1, self.use_ipex = use_ipex enable_bf16 = self.use_ipex and kwargs.get('precision', None) == 'bf16' - # No need to set precision for torch greater or equal to 1.10, - # because strategy > accelerator/precision/plugin + # Set 'precision' for strategy without precision_plugin, + # Strategy > accelerator/precision/plugin if TORCH_VERSION_LESS_1_10 and enable_bf16: kwargs['precision'] = 32 From 173021716eab513fc037c251b3f519461863b501 Mon Sep 17 00:00:00 2001 From: Hu Mingzhi Date: Mon, 15 Aug 2022 13:26:23 +0000 Subject: [PATCH 24/32] Update --- python/nano/src/bigdl/nano/pytorch/trainer/Trainer.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/python/nano/src/bigdl/nano/pytorch/trainer/Trainer.py b/python/nano/src/bigdl/nano/pytorch/trainer/Trainer.py index fdb34ae6341..dfcde15193e 100644 --- a/python/nano/src/bigdl/nano/pytorch/trainer/Trainer.py +++ b/python/nano/src/bigdl/nano/pytorch/trainer/Trainer.py @@ -105,7 +105,7 @@ def __init__(self, num_processes: int = 1, self.use_ipex = use_ipex enable_bf16 = self.use_ipex and kwargs.get('precision', None) == 'bf16' - # Set 'precision' for strategy without precision_plugin, + # Set 'precision' for strategy without precision_plugin, # Strategy > accelerator/precision/plugin if TORCH_VERSION_LESS_1_10 and enable_bf16: kwargs['precision'] = 32 From 27037dc6361180b54b7c6b1820f4fe8a2d35d41d Mon Sep 17 00:00:00 2001 From: Hu Mingzhi Date: Tue, 16 Aug 2022 00:24:29 +0000 Subject: [PATCH 25/32] reduce ut time and re-run action --- .../src/bigdl/nano/pytorch/trainer/Trainer.py | 3 ++- .../test/pytorch/tests/test_plugin_ipex.py | 4 +--- .../test/pytorch/tests/test_trainer_ipex.py | 18 +++++++----------- .../pytorch/tests/test_trainer_precision.py | 5 +++-- .../test/pytorch/utils/_train_ipex_callback.py | 7 ++++++- 5 files changed, 19 insertions(+), 18 deletions(-) diff --git a/python/nano/src/bigdl/nano/pytorch/trainer/Trainer.py b/python/nano/src/bigdl/nano/pytorch/trainer/Trainer.py index dfcde15193e..9160ef6f938 100644 --- a/python/nano/src/bigdl/nano/pytorch/trainer/Trainer.py +++ b/python/nano/src/bigdl/nano/pytorch/trainer/Trainer.py @@ -107,6 +107,7 @@ def __init__(self, num_processes: int = 1, # Set 'precision' for strategy without precision_plugin, # Strategy > accelerator/precision/plugin + # torch must be greater or equal to 1.10 to use natice amp for bfloat16 precision if TORCH_VERSION_LESS_1_10 and enable_bf16: kwargs['precision'] = 32 @@ -119,7 +120,7 @@ def __init__(self, num_processes: int = 1, elif enable_bf16: warning("Enable IPEX bfloat16 in a cpu instruction set" " without avx512 will crash. " - "Will use PyTorch Lightning BFloat16 Mixed Precision") + "Will use PyTorch Lightning Native AMP for BFloat16 precision") enable_bf16 = False if num_processes == 1: diff --git a/python/nano/test/pytorch/tests/test_plugin_ipex.py b/python/nano/test/pytorch/tests/test_plugin_ipex.py index 13e1b572664..0228370ef4a 100644 --- a/python/nano/test/pytorch/tests/test_plugin_ipex.py +++ b/python/nano/test/pytorch/tests/test_plugin_ipex.py @@ -68,8 +68,6 @@ def test_trainer_subprocess_plugin(self): def test_trainer_subprocess_plugin_bf16(self): # IPEX BF16 weight prepack needs the cpu support avx512bw, avx512vl and avx512dq - if not check_avx512(): - return model = ResNet18(pretrained=False, include_top=False, freeze=True) loss = nn.CrossEntropyLoss() optimizer = torch.optim.Adam(model.parameters(), lr=0.01) @@ -82,7 +80,7 @@ def test_trainer_subprocess_plugin_bf16(self): callbacks=[CheckIPEXCallback(), CheckIPEXFusedStepCallback()]) trainer.fit(pl_model, self.data_loader, self.test_data_loader) trainer.test(pl_model, self.test_data_loader) - if TORCH_VERSION_LESS_1_10: + if trainer.use_ipex and TORCH_VERSION_LESS_1_10: import intel_pytorch_extension as ipex # Diable IPEX AMP # Avoid affecting other tests diff --git a/python/nano/test/pytorch/tests/test_trainer_ipex.py b/python/nano/test/pytorch/tests/test_trainer_ipex.py index aa7178dc415..74f4e23fd0d 100644 --- a/python/nano/test/pytorch/tests/test_trainer_ipex.py +++ b/python/nano/test/pytorch/tests/test_trainer_ipex.py @@ -55,11 +55,11 @@ class TestTrainer(TestCase): optimizer = torch.optim.Adam(model.parameters(), lr=0.01) scheduler_dict = { "scheduler": OneCycleLR( - optimizer, - 0.1, - epochs=max_epochs, - steps_per_epoch=len(train_loader), - ), + optimizer, + 0.1, + epochs=max_epochs, + steps_per_epoch=len(train_loader), + ), "interval": "step", } @@ -71,8 +71,6 @@ def test_trainer_save_checkpoint(self): def test_trainer_ipex_bf16(self): # IPEX BF16 weight prepack needs the cpu support avx512bw, avx512vl and avx512dq - if not check_avx512(): - return trainer = Trainer(max_epochs=max_epochs, use_ipex=True, precision="bf16", callbacks=[CheckIPEXFusedStepCallback()]) @@ -94,7 +92,7 @@ def test_trainer_ipex_bf16(self): trainer.fit(pl_model, self.train_loader) trainer.test(pl_model, self.train_loader) - if TORCH_VERSION_LESS_1_10: + if trainer.use_ipex and TORCH_VERSION_LESS_1_10: import intel_pytorch_extension as ipex # Diable IPEX AMP # Avoid affecting other tests @@ -102,8 +100,6 @@ def test_trainer_ipex_bf16(self): def test_trainer_ipex_bf16_unspport_optim(self): # IPEX BF16 weight prepack needs the cpu support avx512bw, avx512vl and avx512dq - if not check_avx512(): - return trainer = Trainer(max_epochs=max_epochs, use_ipex=True, precision="bf16", callbacks=[CheckIPEXFusedStepCallback()]) @@ -124,7 +120,7 @@ def test_trainer_ipex_bf16_unspport_optim(self): trainer.fit(pl_model, self.train_loader) trainer.test(pl_model, self.train_loader) - if TORCH_VERSION_LESS_1_10: + if trainer.use_ipex and TORCH_VERSION_LESS_1_10: import intel_pytorch_extension as ipex # Diable IPEX AMP # Avoid affecting other tests diff --git a/python/nano/test/pytorch/tests/test_trainer_precision.py b/python/nano/test/pytorch/tests/test_trainer_precision.py index 31e7ad7ca4a..d30b2a9c3d9 100644 --- a/python/nano/test/pytorch/tests/test_trainer_precision.py +++ b/python/nano/test/pytorch/tests/test_trainer_precision.py @@ -63,14 +63,15 @@ def test_trainer_precision(self): loss = nn.CrossEntropyLoss() optimizer = torch.optim.Adam(model.parameters(), lr=0.01) pl_model = Trainer.compile(model, loss, optimizer) + # torch must be greater or euqal to 1.10 to use native amp for bfloat16 precision if TORCH_VERSION_LESS_1_10: - trainer = Trainer(max_epochs=4, precision=64) + trainer = Trainer(max_epochs=2, precision=64) trainer.fit(pl_model, self.train_loader) assert isinstance(trainer.strategy.precision_plugin, DoublePrecisionPlugin) opt = pl_model.optimizers() assert opt.param_groups[0]['params'][0].dtype is torch.float64 else: - trainer = Trainer(max_epochs=4, precision='bf16') + trainer = Trainer(max_epochs=2, precision='bf16') trainer.fit(pl_model, self.train_loader) assert isinstance(trainer.strategy.precision_plugin, NativeMixedPrecisionPlugin) # model is not converted to bfloat16 precision diff --git a/python/nano/test/pytorch/utils/_train_ipex_callback.py b/python/nano/test/pytorch/utils/_train_ipex_callback.py index 684289c8606..1b972108892 100644 --- a/python/nano/test/pytorch/utils/_train_ipex_callback.py +++ b/python/nano/test/pytorch/utils/_train_ipex_callback.py @@ -20,9 +20,11 @@ import pytorch_lightning as pl from pytorch_lightning.callbacks import Callback from pytorch_lightning.plugins.training_type import SingleDevicePlugin, DDPSpawnPlugin -from bigdl.nano.pytorch.utils import TORCH_VERSION_LESS_1_10 from pytorch_lightning.accelerators.cpu import CPUAccelerator +from bigdl.nano.pytorch.utils import TORCH_VERSION_LESS_1_10 +from bigdl.nano.common import check_avx512 + class CheckIPEXCallback(Callback): def on_train_start(self, trainer, pl_module): @@ -76,6 +78,9 @@ def check_ipex_layers(m): class CheckIPEXFusedStepCallback(Callback): def on_train_start(self, trainer: "pl.Trainer", pl_module: "pl.LightningModule"): + if not check_avx512(): + # IPEX BF16 weight prepack needs the cpu support avx512bw, avx512vl and avx512dq + return if not TORCH_VERSION_LESS_1_10: from intel_extension_for_pytorch.optim._optimizer_utils import IPEX_FUSED_OPTIMIZER_LIST # IPEX only support one optimizer From 070fe97bd690f5fa38ecafe7bf461a4557746557 Mon Sep 17 00:00:00 2001 From: Hu Mingzhi Date: Tue, 16 Aug 2022 01:33:49 +0000 Subject: [PATCH 26/32] track avx512 --- python/nano/test/pytorch/tests/test_plugin_ipex.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/python/nano/test/pytorch/tests/test_plugin_ipex.py b/python/nano/test/pytorch/tests/test_plugin_ipex.py index 0228370ef4a..7d9015aca7d 100644 --- a/python/nano/test/pytorch/tests/test_plugin_ipex.py +++ b/python/nano/test/pytorch/tests/test_plugin_ipex.py @@ -66,7 +66,7 @@ def test_trainer_subprocess_plugin(self): trainer.fit(pl_model, self.data_loader, self.test_data_loader) trainer.test(pl_model, self.test_data_loader) - def test_trainer_subprocess_plugin_bf16(self): + def test_trainer_spawn_plugin_bf16(self): # IPEX BF16 weight prepack needs the cpu support avx512bw, avx512vl and avx512dq model = ResNet18(pretrained=False, include_top=False, freeze=True) loss = nn.CrossEntropyLoss() @@ -75,7 +75,7 @@ def test_trainer_subprocess_plugin_bf16(self): model, loss, optimizer, metrics=[torchmetrics.F1(num_classes), torchmetrics.Accuracy(num_classes=10)] ) - trainer = Trainer(num_processes=2, distributed_backend="subprocess", + trainer = Trainer(num_processes=2, distributed_backend="spawn", max_epochs=4, use_ipex=True, precision="bf16", callbacks=[CheckIPEXCallback(), CheckIPEXFusedStepCallback()]) trainer.fit(pl_model, self.data_loader, self.test_data_loader) From c5dd357edce3164820cdc034b73281ff11ea559e Mon Sep 17 00:00:00 2001 From: Hu Mingzhi Date: Wed, 17 Aug 2022 05:29:05 +0000 Subject: [PATCH 27/32] Update lite bf16 training --- .../pytorch/strategies/ipex/ipex_strategy.py | 8 ++++-- .../nano/src/bigdl/nano/pytorch/torch_nano.py | 27 +++++++++++++------ .../src/bigdl/nano/pytorch/trainer/Trainer.py | 2 +- .../pytorch/tests/test_torch_nano_ipex.py | 19 +++++++++++-- 4 files changed, 43 insertions(+), 13 deletions(-) diff --git a/python/nano/src/bigdl/nano/pytorch/strategies/ipex/ipex_strategy.py b/python/nano/src/bigdl/nano/pytorch/strategies/ipex/ipex_strategy.py index 28166f72e8f..fe8343964dd 100644 --- a/python/nano/src/bigdl/nano/pytorch/strategies/ipex/ipex_strategy.py +++ b/python/nano/src/bigdl/nano/pytorch/strategies/ipex/ipex_strategy.py @@ -27,7 +27,8 @@ import pytorch_lightning as pl from pytorch_lightning.strategies import SingleDeviceStrategy from pytorch_lightning.accelerators.accelerator import Accelerator -from pytorch_lightning.plugins.precision import PrecisionPlugin, NativeMixedPrecisionPlugin +from pytorch_lightning.plugins.precision import PrecisionPlugin, MixedPrecisionPlugin +from pytorch_lightning.utilities import AMPType from bigdl.nano.utils.log4Error import invalidInputError import intel_extension_for_pytorch as ipex @@ -78,9 +79,12 @@ def setup(self, trainer: pl.Trainer) -> None: invalidInputError(False, "Ipex does not support more than one optimizers.") -class IPEXBF16Precision(PrecisionPlugin): +class IPEXBF16Precision(MixedPrecisionPlugin): """Create Precision Plugin for IPEX BFloat16.""" + backend: "AMPType" = AMPType.NATIVE + precision: Union[str, int] = 'bf16' + @contextmanager def forward_context(self): """AMP for managing model forward/training_step/evaluation_step/predict_step.""" diff --git a/python/nano/src/bigdl/nano/pytorch/torch_nano.py b/python/nano/src/bigdl/nano/pytorch/torch_nano.py index eee05e66142..de99d06844a 100644 --- a/python/nano/src/bigdl/nano/pytorch/torch_nano.py +++ b/python/nano/src/bigdl/nano/pytorch/torch_nano.py @@ -52,7 +52,6 @@ class TorchNano(LightningLite): def __init__(self, num_processes: int = 1, use_ipex: bool = False, - enable_bf16: bool = False, strategy: str = "subprocess", *args, **kwargs) -> None: """ @@ -66,13 +65,25 @@ def __init__(self, num_processes: int = 1, """ self.num_processes = num_processes self.use_ipex = use_ipex - self.enable_bf16 = enable_bf16 - - if TORCH_VERSION_LESS_1_11 and use_ipex and not check_avx512(): - warning("Enable ipex<=1.10 in a cpu instruction set" - " without avx512 will crash." - "Fall back to regular pytorch.") - self.use_ipex = False + self.enable_bf16 = self.use_ipex and kwargs.get('precision', None) == 'bf16' + + # Set 'precision' for strategy without precision_plugin, + # Strategy > accelerator/precision/plugin + # torch must be greater or equal to 1.10 to use native amp for bfloat16 precision + if TORCH_VERSION_LESS_1_10 and enable_bf16: + kwargs['precision'] = 32 + + if self.use_ipex and not check_avx512(): + if TORCH_VERSION_LESS_1_11: + warning("Enable ipex<=1.10 in a cpu instruction set" + " without avx512 will crash." + "Fall back to regular pytorch.") + self.use_ipex = False + elif enable_bf16: + warning("Enable IPEX bfloat16 in a cpu instruction set" + " without avx512 will crash. " + "Will use PyTorch Lightning Native AMP for BFloat16 precision") + enable_bf16 = False if self.num_processes == 1: if self.use_ipex: diff --git a/python/nano/src/bigdl/nano/pytorch/trainer/Trainer.py b/python/nano/src/bigdl/nano/pytorch/trainer/Trainer.py index 9160ef6f938..0e32b08a622 100644 --- a/python/nano/src/bigdl/nano/pytorch/trainer/Trainer.py +++ b/python/nano/src/bigdl/nano/pytorch/trainer/Trainer.py @@ -107,7 +107,7 @@ def __init__(self, num_processes: int = 1, # Set 'precision' for strategy without precision_plugin, # Strategy > accelerator/precision/plugin - # torch must be greater or equal to 1.10 to use natice amp for bfloat16 precision + # torch must be greater or equal to 1.10 to use native amp for bfloat16 precision if TORCH_VERSION_LESS_1_10 and enable_bf16: kwargs['precision'] = 32 diff --git a/python/nano/test/pytorch/tests/test_torch_nano_ipex.py b/python/nano/test/pytorch/tests/test_torch_nano_ipex.py index 3c01e1e639e..8d6b9ac4862 100644 --- a/python/nano/test/pytorch/tests/test_torch_nano_ipex.py +++ b/python/nano/test/pytorch/tests/test_torch_nano_ipex.py @@ -45,10 +45,13 @@ def forward(self, x): class MyNano(TorchNano): - def train(self): + def train(self, optimizer_supported: bool = False): model = ResNet18(10, pretrained=False, include_top=False, freeze=True) loss_func = nn.CrossEntropyLoss() - optimizer = torch.optim.Adam(model.parameters(), lr=0.01) + if optimizer_supported: + optimizer = torch.optim.SGD(model.parameters, lr=0.01) + else: + optimizer = torch.optim.Adam(model.parameters(), lr=0.01) train_loader = create_data_loader(data_dir, batch_size, num_workers, data_transform) model, optimizer, train_loader = self.setup(model, optimizer, train_loader) @@ -132,6 +135,18 @@ def test_torch_nano_spawn_correctness(self): def test_torch_nano_subprocess_correctness(self): MyNanoCorrectness(use_ipex=True, num_processes=2, strategy="subprocess").train(0.5) + def test_torch_nano_bf16_support_opt(self): + MyNano(use_ipex=True, precision='bf16').train(optimizer_supported=True) + + def test_torch_nano_bf16_unsupport_opt(self): + MyNano(use_ipex=True, precision='bf16').train() + + def test_torch_nano_bf16_spawn(self): + MyNano(use_ipex=True, precision='bf16', num_processes=2, strategy="spawn").train() + + def test_torch_nano_bf16_subprocess(self): + MyNano(use_ipex=True, precision='bf16', num_processes=2, strategy="subprocess").train() + if __name__ == '__main__': pytest.main([__file__]) From ff461c7c321f8d616573b708ea0127d8fdaba836 Mon Sep 17 00:00:00 2001 From: Hu Mingzhi Date: Wed, 17 Aug 2022 07:27:07 +0000 Subject: [PATCH 28/32] Update --- .../nano/src/bigdl/nano/pytorch/torch_nano.py | 22 +++++++++---------- .../pytorch/tests/test_torch_nano_ipex.py | 3 +-- 2 files changed, 12 insertions(+), 13 deletions(-) diff --git a/python/nano/src/bigdl/nano/pytorch/torch_nano.py b/python/nano/src/bigdl/nano/pytorch/torch_nano.py index de99d06844a..4c9e9ab8c36 100644 --- a/python/nano/src/bigdl/nano/pytorch/torch_nano.py +++ b/python/nano/src/bigdl/nano/pytorch/torch_nano.py @@ -59,7 +59,6 @@ def __init__(self, num_processes: int = 1, :param num_processes: number of processes in distributed training, defaults to 1 :param use_ipex: whether use ipex acceleration, defaults to False - :param enable_bf16: whether use bf16 acceleration, defaults to False :param strategy: use which backend in distributed mode, defaults to "subprocess", \ now avaiable strategies are 'spawn', 'subprocess' and 'ray' """ @@ -70,7 +69,7 @@ def __init__(self, num_processes: int = 1, # Set 'precision' for strategy without precision_plugin, # Strategy > accelerator/precision/plugin # torch must be greater or equal to 1.10 to use native amp for bfloat16 precision - if TORCH_VERSION_LESS_1_10 and enable_bf16: + if TORCH_VERSION_LESS_1_10 and self.enable_bf16: kwargs['precision'] = 32 if self.use_ipex and not check_avx512(): @@ -79,11 +78,11 @@ def __init__(self, num_processes: int = 1, " without avx512 will crash." "Fall back to regular pytorch.") self.use_ipex = False - elif enable_bf16: + elif self.enable_bf16: warning("Enable IPEX bfloat16 in a cpu instruction set" " without avx512 will crash. " "Will use PyTorch Lightning Native AMP for BFloat16 precision") - enable_bf16 = False + self.enable_bf16 = False if self.num_processes == 1: if self.use_ipex: @@ -129,6 +128,14 @@ def _setup( # so we have to add optimizations in this method, which will be called in # user defined `train()` method. + # the following codes are copied from pl's LightningLite's `setup` method, + # ipex 1.9 requires `_move_model_to_device` after `_setup_model_and_optimizers`, but + # pl's `setup` method calls `_move_model_to_device` before `_setup_model_and_optimizers`, + # so we copy the codes and swap their order. + self._validate_setup(model, optimizers) + + model, optimizers = self._strategy._setup_model_and_optimizers(model, list(optimizers)) + # add IPEX 1.11's optimization if self.use_ipex and not TORCH_VERSION_LESS_1_10: dtype = torch.bfloat16 if self.enable_bf16 else None @@ -139,13 +146,6 @@ def _setup( else: invalidInputError(False, "Ipex does not support more than one optimizers.") - # the following codes are copied from pl's LightningLite's `setup` method, - # ipex 1.9 requires `_move_model_to_device` after `_setup_model_and_optimizers`, but - # pl's `setup` method calls `_move_model_to_device` before `_setup_model_and_optimizers`, - # so we copy the codes and swap their order. - self._validate_setup(model, optimizers) - - model, optimizers = self._strategy._setup_model_and_optimizers(model, optimizers) if move_to_device: model = self._move_model_to_device(model=model, optimizers=optimizers) model = _TorchNanoModule(model, self._precision_plugin) diff --git a/python/nano/test/pytorch/tests/test_torch_nano_ipex.py b/python/nano/test/pytorch/tests/test_torch_nano_ipex.py index 8d6b9ac4862..d9c25590a7b 100644 --- a/python/nano/test/pytorch/tests/test_torch_nano_ipex.py +++ b/python/nano/test/pytorch/tests/test_torch_nano_ipex.py @@ -49,7 +49,7 @@ def train(self, optimizer_supported: bool = False): model = ResNet18(10, pretrained=False, include_top=False, freeze=True) loss_func = nn.CrossEntropyLoss() if optimizer_supported: - optimizer = torch.optim.SGD(model.parameters, lr=0.01) + optimizer = torch.optim.SGD(model.parameters(), lr=0.01) else: optimizer = torch.optim.Adam(model.parameters(), lr=0.01) train_loader = create_data_loader(data_dir, batch_size, num_workers, data_transform) @@ -66,7 +66,6 @@ def train(self, optimizer_supported: bool = False): loss = loss_func(model(X), y) self.backward(loss) optimizer.step() - total_loss += loss.sum() num += 1 print(f'avg_loss: {total_loss / num}') From 5bcfab09d15c1c624742b9349da584cacba63541 Mon Sep 17 00:00:00 2001 From: Hu Mingzhi Date: Thu, 18 Aug 2022 00:45:24 +0000 Subject: [PATCH 29/32] Update --- python/nano/src/bigdl/nano/pytorch/torch_nano.py | 9 ++++++--- python/nano/src/bigdl/nano/pytorch/trainer/Trainer.py | 9 ++++++--- 2 files changed, 12 insertions(+), 6 deletions(-) diff --git a/python/nano/src/bigdl/nano/pytorch/torch_nano.py b/python/nano/src/bigdl/nano/pytorch/torch_nano.py index 4c9e9ab8c36..f83edfcfe54 100644 --- a/python/nano/src/bigdl/nano/pytorch/torch_nano.py +++ b/python/nano/src/bigdl/nano/pytorch/torch_nano.py @@ -66,8 +66,8 @@ def __init__(self, num_processes: int = 1, self.use_ipex = use_ipex self.enable_bf16 = self.use_ipex and kwargs.get('precision', None) == 'bf16' - # Set 'precision' for strategy without precision_plugin, - # Strategy > accelerator/precision/plugin + # Strategy has a higher priority than accelerator/precision/plugin, + # set precision for strategy without precision_plugin(e.g. ddp-spawn, ddp-subprocess) # torch must be greater or equal to 1.10 to use native amp for bfloat16 precision if TORCH_VERSION_LESS_1_10 and self.enable_bf16: kwargs['precision'] = 32 @@ -83,6 +83,7 @@ def __init__(self, num_processes: int = 1, " without avx512 will crash. " "Will use PyTorch Lightning Native AMP for BFloat16 precision") self.enable_bf16 = False + kwargs['precision'] = 32 if self.num_processes == 1: if self.use_ipex: @@ -136,7 +137,9 @@ def _setup( model, optimizers = self._strategy._setup_model_and_optimizers(model, list(optimizers)) - # add IPEX 1.11's optimization + # IPEX bfloat16 optimization will cast model parameters to `torch.bfloat16` + # which is not supported by ddp currently, + # so add IPEX 1.11's optimization after `_setup_model` if self.use_ipex and not TORCH_VERSION_LESS_1_10: dtype = torch.bfloat16 if self.enable_bf16 else None if len(optimizers) == 0: diff --git a/python/nano/src/bigdl/nano/pytorch/trainer/Trainer.py b/python/nano/src/bigdl/nano/pytorch/trainer/Trainer.py index 0e32b08a622..bab1adc6332 100644 --- a/python/nano/src/bigdl/nano/pytorch/trainer/Trainer.py +++ b/python/nano/src/bigdl/nano/pytorch/trainer/Trainer.py @@ -105,8 +105,8 @@ def __init__(self, num_processes: int = 1, self.use_ipex = use_ipex enable_bf16 = self.use_ipex and kwargs.get('precision', None) == 'bf16' - # Set 'precision' for strategy without precision_plugin, - # Strategy > accelerator/precision/plugin + # Strategy has a higher priority than accelerator/precision/plugin, + # set precision for strategy without precision_plugin(e.g. ddp-spawn, ddp-subprocess) # torch must be greater or equal to 1.10 to use native amp for bfloat16 precision if TORCH_VERSION_LESS_1_10 and enable_bf16: kwargs['precision'] = 32 @@ -120,8 +120,11 @@ def __init__(self, num_processes: int = 1, elif enable_bf16: warning("Enable IPEX bfloat16 in a cpu instruction set" " without avx512 will crash. " - "Will use PyTorch Lightning Native AMP for BFloat16 precision") + "Using 32-bit precision") enable_bf16 = False + # IPEX-optimized model is incompatible with PL Native AMP, + # so fall back to 32-bit precision instead of staying at bfloat16 precision + kwargs['precision'] = 32 if num_processes == 1: from bigdl.nano.pytorch.strategies import create_IPEXStrategy From 3fad3ba16f1163c5d76876493735e458e3e09a27 Mon Sep 17 00:00:00 2001 From: Hu Mingzhi Date: Tue, 23 Aug 2022 00:30:36 +0000 Subject: [PATCH 30/32] Update bf16 api --- .../bigdl/nano/deps/ray/ray_distributed.py | 10 +++--- .../nano/pytorch/strategies/ddp_spawn.py | 13 ++++--- .../pytorch/strategies/ipex/ipex_strategy.py | 23 ++++++------ .../ipex/version_1_9/ipex_strategy_1_9.py | 4 +-- .../nano/src/bigdl/nano/pytorch/torch_nano.py | 36 +++++++++---------- .../src/bigdl/nano/pytorch/trainer/Trainer.py | 36 ++++++++++--------- 6 files changed, 59 insertions(+), 63 deletions(-) diff --git a/python/nano/src/bigdl/nano/deps/ray/ray_distributed.py b/python/nano/src/bigdl/nano/deps/ray/ray_distributed.py index da16615cb8c..32d7cc7bcb6 100644 --- a/python/nano/src/bigdl/nano/deps/ray/ray_distributed.py +++ b/python/nano/src/bigdl/nano/deps/ray/ray_distributed.py @@ -176,7 +176,7 @@ def __init__(self, num_cpus_per_worker: int = 1, use_gpu: bool = False, use_ipex: bool = False, - enable_bf16: bool = False, + dtype=None, init_hook: Callable = None, auto_lr: Union[bool, dict] = True, **ddp_kwargs: Any): @@ -207,7 +207,7 @@ def __init__(self, self.num_cpus_per_worker = num_cpus_per_worker self.use_gpu = use_gpu self.use_ipex = use_ipex - self.enable_bf16 = enable_bf16 + self.dtype = dtype self.auto_lr = auto_lr invalidInputError(not self.use_gpu or not self.use_ipex, @@ -328,14 +328,12 @@ def _unpack_lightning_optimizer(opt): ] if self.use_ipex and not TORCH_VERSION_LESS_1_10: - dtype = torch.bfloat16 if self.enable_bf16 else None num_optimizers = len(self.optimizers) - if num_optimizers == 1: optimizer = self.optimizers[0] - ipex_optimize(self.model, optimizer=optimizer, inplace=True, dtype=dtype) + ipex_optimize(self.model, optimizer=optimizer, inplace=True, dtype=self.dtype) elif num_optimizers == 0: - ipex_optimize(self.model, inplace=True, dtype=dtype) + ipex_optimize(self.model, inplace=True, dtype=self.dtype) else: warnings.warn(f"IPEX currently only support single optimizers, " f"but got {num_optimizers}. Skip IPEX") diff --git a/python/nano/src/bigdl/nano/pytorch/strategies/ddp_spawn.py b/python/nano/src/bigdl/nano/pytorch/strategies/ddp_spawn.py index c476efe5428..558e1b58109 100644 --- a/python/nano/src/bigdl/nano/pytorch/strategies/ddp_spawn.py +++ b/python/nano/src/bigdl/nano/pytorch/strategies/ddp_spawn.py @@ -168,7 +168,7 @@ def __init__( num_processes: int = 1, cpu_for_each_process: Optional[List[List[int]]] = None, use_ipex=False, - enable_bf16=False, + dtype=None, auto_lr=False, **kwargs: Any ): @@ -182,11 +182,11 @@ def __init__( parallel_devices=parallel_devices, cluster_environment=cluster_environment, **kwargs) - if enable_bf16: + if dtype == torch.bfloat16: import intel_pytorch_extension as ipex # Automatically mix precision ipex.enable_auto_mixed_precision(mixed_dtype=torch.bfloat16) - elif use_ipex and enable_bf16 and 'precision_plugin' not in kwargs: + elif use_ipex and dtype == torch.bfloat16 and 'precision_plugin' not in kwargs: from bigdl.nano.pytorch.strategies.ipex.ipex_strategy import IPEXBF16Precision super().__init__(parallel_devices=parallel_devices, cluster_environment=cluster_environment, @@ -197,7 +197,7 @@ def __init__( self.cpu_for_each_process = cpu_for_each_process self.is_distributed = True self.use_ipex = use_ipex - self.enable_bf16 = enable_bf16 + self.dtype = dtype self.auto_lr = auto_lr def _configure_launcher(self): @@ -259,14 +259,13 @@ def _unpack_lightning_optimizer(opt): ] if self.use_ipex and not TORCH_VERSION_LESS_1_10: - dtype = torch.bfloat16 if self.enable_bf16 else None num_optimizers = len(self.optimizers) if num_optimizers == 1: optimizer = self.optimizers[0] - ipex_optimize(self.model, optimizer=optimizer, inplace=True, dtype=dtype) + ipex_optimize(self.model, optimizer=optimizer, inplace=True, dtype=self.dtype) elif num_optimizers == 0: - ipex_optimize(self.model, inplace=True, dtype=dtype) + ipex_optimize(self.model, inplace=True, dtype=self.dtype) else: warnings.warn(f"IPEX currently only support single optimizers, " f"but got {num_optimizers}. Skip IPEX") diff --git a/python/nano/src/bigdl/nano/pytorch/strategies/ipex/ipex_strategy.py b/python/nano/src/bigdl/nano/pytorch/strategies/ipex/ipex_strategy.py index fe8343964dd..9b987f51d2e 100644 --- a/python/nano/src/bigdl/nano/pytorch/strategies/ipex/ipex_strategy.py +++ b/python/nano/src/bigdl/nano/pytorch/strategies/ipex/ipex_strategy.py @@ -27,7 +27,7 @@ import pytorch_lightning as pl from pytorch_lightning.strategies import SingleDeviceStrategy from pytorch_lightning.accelerators.accelerator import Accelerator -from pytorch_lightning.plugins.precision import PrecisionPlugin, MixedPrecisionPlugin +from pytorch_lightning.plugins.precision import PrecisionPlugin from pytorch_lightning.utilities import AMPType from bigdl.nano.utils.log4Error import invalidInputError @@ -46,7 +46,7 @@ def __init__( self, accelerator: Accelerator = IPEXAccelerator(), precision_plugin: PrecisionPlugin = PrecisionPlugin(), - enable_bf16=False, + dtype=None, ) -> None: """ Create a IPEXStrategy. @@ -54,9 +54,9 @@ def __init__( :param accelerator: the accelerator to handle hardware :param precision_plugin: the plugin to handle precision-specific parts """ - self.enable_bf16 = enable_bf16 + self.dtype = dtype - if enable_bf16 and isinstance(precision_plugin, PrecisionPlugin): + if self.dtype == torch.bfloat16 and isinstance(precision_plugin, PrecisionPlugin): precision_plugin = IPEXBF16Precision() super().__init__(accelerator=accelerator, precision_plugin=precision_plugin) @@ -70,28 +70,22 @@ def setup(self, trainer: pl.Trainer) -> None: """ super().setup(trainer) - dtype = torch.bfloat16 if self.enable_bf16 else None if len(self.optimizers) == 0: - ipex.optimize(self.model, inplace=True, dtype=dtype) + ipex.optimize(self.model, inplace=True, dtype=self.dtype) elif len(self.optimizers) == 1: - ipex.optimize(self.model, optimizer=self.optimizers[0], inplace=True, dtype=dtype) + ipex.optimize(self.model, optimizer=self.optimizers[0], inplace=True, dtype=self.dtype) else: invalidInputError(False, "Ipex does not support more than one optimizers.") -class IPEXBF16Precision(MixedPrecisionPlugin): +class IPEXBF16Precision(PrecisionPlugin): """Create Precision Plugin for IPEX BFloat16.""" - backend: "AMPType" = AMPType.NATIVE precision: Union[str, int] = 'bf16' @contextmanager def forward_context(self): """AMP for managing model forward/training_step/evaluation_step/predict_step.""" - # Using IPEX bf16 and torch.autocast(...) reports a segmentation fault - # in PyTorch 1.11. - # torch.autocast("cpu", args...) is equivalent to torch.cpu.amp.autocast(args...) - # in PyTorch 1.12. with torch.cpu.amp.autocast(): yield @@ -122,6 +116,9 @@ def optimizer_step(self, warning("Seems like you are using a custom optimizer," "please make sure that 'optimizer.step(closure)'" " does not need to be called in training stage") + + # For optimizer not in IPEX_FUSED_OPTIMIZER_LIST, + # `closure()` needs to be called to backward the loss to avoid `.grad` being None closure_result = closure() optimizer.step(**kwargs) diff --git a/python/nano/src/bigdl/nano/pytorch/strategies/ipex/version_1_9/ipex_strategy_1_9.py b/python/nano/src/bigdl/nano/pytorch/strategies/ipex/version_1_9/ipex_strategy_1_9.py index 71d3b72dd44..d7d93a32351 100644 --- a/python/nano/src/bigdl/nano/pytorch/strategies/ipex/version_1_9/ipex_strategy_1_9.py +++ b/python/nano/src/bigdl/nano/pytorch/strategies/ipex/version_1_9/ipex_strategy_1_9.py @@ -39,7 +39,7 @@ def __init__( self, accelerator: Accelerator = IPEXAccelerator(), # type: ignore precision_plugin: PrecisionPlugin = PrecisionPlugin(), - enable_bf16=False, + dtype=None, ) -> None: """ Create a IPEXStrategy. @@ -47,7 +47,7 @@ def __init__( :param accelerator: the accelerator to handle hardware :param precision_plugin: the plugin to handle precision-specific parts """ - if enable_bf16: + if dtype == torch.bfloat16: # Automatically mix precision ipex.enable_auto_mixed_precision(mixed_dtype=torch.bfloat16) diff --git a/python/nano/src/bigdl/nano/pytorch/torch_nano.py b/python/nano/src/bigdl/nano/pytorch/torch_nano.py index f83edfcfe54..5cbc5ab62cb 100644 --- a/python/nano/src/bigdl/nano/pytorch/torch_nano.py +++ b/python/nano/src/bigdl/nano/pytorch/torch_nano.py @@ -53,6 +53,7 @@ class TorchNano(LightningLite): def __init__(self, num_processes: int = 1, use_ipex: bool = False, strategy: str = "subprocess", + precision: Union[str, int] = 32, *args, **kwargs) -> None: """ Create a TorchNano with nano acceleration. @@ -64,44 +65,44 @@ def __init__(self, num_processes: int = 1, """ self.num_processes = num_processes self.use_ipex = use_ipex - self.enable_bf16 = self.use_ipex and kwargs.get('precision', None) == 'bf16' - - # Strategy has a higher priority than accelerator/precision/plugin, - # set precision for strategy without precision_plugin(e.g. ddp-spawn, ddp-subprocess) - # torch must be greater or equal to 1.10 to use native amp for bfloat16 precision - if TORCH_VERSION_LESS_1_10 and self.enable_bf16: - kwargs['precision'] = 32 + self.dtype = None + if self.use_ipex and precision == 'bf16': + # Enable ipex bfloat16 weight prepack and disable native AMP + self.dtype = torch.float16 + precision = 32 + # Confirm if cpu supports AVX512 if self.use_ipex and not check_avx512(): if TORCH_VERSION_LESS_1_11: warning("Enable ipex<=1.10 in a cpu instruction set" " without avx512 will crash." "Fall back to regular pytorch.") self.use_ipex = False - elif self.enable_bf16: + elif self.dtype == torch.bfloat16: warning("Enable IPEX bfloat16 in a cpu instruction set" " without avx512 will crash. " - "Will use PyTorch Lightning Native AMP for BFloat16 precision") - self.enable_bf16 = False - kwargs['precision'] = 32 + "Using 32-bit precision") + self.dtype = None + + kwargs['precision'] = precision if self.num_processes == 1: if self.use_ipex: - strategy = create_IPEXStrategy(enable_bf16=self.enable_bf16) + strategy = create_IPEXStrategy(dtype=self.dtype) else: strategy = None # type: ignore elif strategy == "spawn": strategy = DDPSpawnStrategy(num_processes=self.num_processes, # type: ignore use_ipex=self.use_ipex, - enable_bf16=self.enable_bf16) + dtype=self.dtype) elif strategy == "subprocess": strategy = DDPSubprocessStrategy(num_processes=self.num_processes, # type: ignore use_ipex=self.use_ipex, - enable_bf16=self.enable_bf16) + dtype=self.dtype) elif strategy == "ray": strategy = create_RayStrategy(num_workers=self.num_processes, use_ipex=self.use_ipex, - enable_bf16=self.enable_bf16) + dtype=self.dtype) else: warning(f"Bigdl-nano doesn't support '{strategy}' strategy now, " f"'{strategy}' strategy of pytorch_lightning will be used. " @@ -141,11 +142,10 @@ def _setup( # which is not supported by ddp currently, # so add IPEX 1.11's optimization after `_setup_model` if self.use_ipex and not TORCH_VERSION_LESS_1_10: - dtype = torch.bfloat16 if self.enable_bf16 else None if len(optimizers) == 0: - ipex_optimize(model, inplace=True, dtype=dtype) + ipex_optimize(model, inplace=True, dtype=self.dtype) elif len(optimizers) == 1: - ipex_optimize(model, optimizer=optimizers[0], inplace=True, dtype=dtype) + ipex_optimize(model, optimizer=optimizers[0], inplace=True, dtype=self.dtype) else: invalidInputError(False, "Ipex does not support more than one optimizers.") diff --git a/python/nano/src/bigdl/nano/pytorch/trainer/Trainer.py b/python/nano/src/bigdl/nano/pytorch/trainer/Trainer.py index bab1adc6332..ffca917a0d4 100644 --- a/python/nano/src/bigdl/nano/pytorch/trainer/Trainer.py +++ b/python/nano/src/bigdl/nano/pytorch/trainer/Trainer.py @@ -62,6 +62,7 @@ def __init__(self, num_processes: int = 1, use_hpo=False, channels_last: bool = False, auto_lr: Union[int, bool] = True, + precision: Union[str, int] = 32, *args: Any, **kwargs: Any) -> None: """ A pytorch lightning trainer that uses bigdl-nano optimization. @@ -71,6 +72,9 @@ def __init__(self, num_processes: int = 1, :param cpu_for_each_process: A list of length `num_processes`, each containing a list of indices of cpus each process will be using. default: None, and the cpu will be automatically and evenly distributed among processes. + :param precision: Double precision (64), full precision (32), half precision (16) + or bfloat16 precision (bf16). Enable ipex bfloat16 weight prepack when `use_ipex=True` + and `precision='bf16'` """ # Check keyword arguments if "accelerator" in kwargs: @@ -103,32 +107,30 @@ def __init__(self, num_processes: int = 1, kwargs["callbacks"] = [ChannelsLastCallback()] self.use_ipex = use_ipex - enable_bf16 = self.use_ipex and kwargs.get('precision', None) == 'bf16' - - # Strategy has a higher priority than accelerator/precision/plugin, - # set precision for strategy without precision_plugin(e.g. ddp-spawn, ddp-subprocess) - # torch must be greater or equal to 1.10 to use native amp for bfloat16 precision - if TORCH_VERSION_LESS_1_10 and enable_bf16: - kwargs['precision'] = 32 + dtype = None + if self.use_ipex and precision == 'bf16': + # Enable ipex bfloat16 weight prepack and disable pytorch-lightning native AMP + dtype = torch.bfloat16 + precision = 32 + # Confirm if cpu supports avx512 if self.use_ipex and not check_avx512(): if TORCH_VERSION_LESS_1_11: - warning("Enable ipex<=1.10 in a cpu instruction set" + warning("Enable ipex<=1.11 in a cpu instruction set" " without avx512 will crash." "Fall back to regular pytorch.") self.use_ipex = False - elif enable_bf16: + elif dtype == torch.float16: warning("Enable IPEX bfloat16 in a cpu instruction set" " without avx512 will crash. " "Using 32-bit precision") - enable_bf16 = False - # IPEX-optimized model is incompatible with PL Native AMP, - # so fall back to 32-bit precision instead of staying at bfloat16 precision - kwargs['precision'] = 32 + dtype = None + + kwargs['precision'] = precision if num_processes == 1: from bigdl.nano.pytorch.strategies import create_IPEXStrategy - strategy = create_IPEXStrategy(enable_bf16=enable_bf16) if self.use_ipex else None + strategy = create_IPEXStrategy(dtype=dtype) if self.use_ipex else None kwargs["strategy"] = strategy super().__init__(*args, **kwargs) else: @@ -147,20 +149,20 @@ def __init__(self, num_processes: int = 1, strategy = DDPSpawnStrategy(num_processes=num_processes, cpu_for_each_process=cpu_for_each_process, use_ipex=self.use_ipex, - enable_bf16=enable_bf16, + dtype=dtype, auto_lr=auto_lr) elif distributed_backend == "subprocess": from bigdl.nano.pytorch.strategies import DDPSubprocessStrategy strategy = DDPSubprocessStrategy(num_processes=num_processes, cpu_for_each_process=cpu_for_each_process, use_ipex=self.use_ipex, - enable_bf16=enable_bf16, + dtype=dtype, auto_lr=auto_lr) elif distributed_backend == "ray": from bigdl.nano.pytorch.strategies import create_RayStrategy strategy = create_RayStrategy(num_workers=num_processes, use_ipex=self.use_ipex, - enable_bf16=enable_bf16, + dtype=dtype, auto_lr=auto_lr) kwargs["strategy"] = strategy super().__init__(*args, **kwargs) From 1cd0215ed5731408dd4afcdea58f360ba774977e Mon Sep 17 00:00:00 2001 From: Hu Mingzhi Date: Tue, 23 Aug 2022 00:46:18 +0000 Subject: [PATCH 31/32] Update --- python/nano/src/bigdl/nano/pytorch/torch_nano.py | 5 ++++- python/nano/src/bigdl/nano/pytorch/trainer/Trainer.py | 4 ++-- 2 files changed, 6 insertions(+), 3 deletions(-) diff --git a/python/nano/src/bigdl/nano/pytorch/torch_nano.py b/python/nano/src/bigdl/nano/pytorch/torch_nano.py index 5cbc5ab62cb..2a9721420ff 100644 --- a/python/nano/src/bigdl/nano/pytorch/torch_nano.py +++ b/python/nano/src/bigdl/nano/pytorch/torch_nano.py @@ -62,6 +62,9 @@ def __init__(self, num_processes: int = 1, :param use_ipex: whether use ipex acceleration, defaults to False :param strategy: use which backend in distributed mode, defaults to "subprocess", \ now avaiable strategies are 'spawn', 'subprocess' and 'ray' + :param precision: Double precision (64), full precision (32), half precision (16) + or bfloat16 precision (bf16), defaults to 32. + Enable ipex bfloat16 weight prepack when `use_ipex=True` and `precision='bf16'` """ self.num_processes = num_processes self.use_ipex = use_ipex @@ -136,7 +139,7 @@ def _setup( # so we copy the codes and swap their order. self._validate_setup(model, optimizers) - model, optimizers = self._strategy._setup_model_and_optimizers(model, list(optimizers)) + model, optimizers = self._strategy._setup_model_and_optimizers(model, optimizers) # IPEX bfloat16 optimization will cast model parameters to `torch.bfloat16` # which is not supported by ddp currently, diff --git a/python/nano/src/bigdl/nano/pytorch/trainer/Trainer.py b/python/nano/src/bigdl/nano/pytorch/trainer/Trainer.py index ffca917a0d4..bc70b9d558e 100644 --- a/python/nano/src/bigdl/nano/pytorch/trainer/Trainer.py +++ b/python/nano/src/bigdl/nano/pytorch/trainer/Trainer.py @@ -73,8 +73,8 @@ def __init__(self, num_processes: int = 1, indices of cpus each process will be using. default: None, and the cpu will be automatically and evenly distributed among processes. :param precision: Double precision (64), full precision (32), half precision (16) - or bfloat16 precision (bf16). Enable ipex bfloat16 weight prepack when `use_ipex=True` - and `precision='bf16'` + or bfloat16 precision (bf16), defaults to 32. + Enable ipex bfloat16 weight prepack when `use_ipex=True` and `precision='bf16'` """ # Check keyword arguments if "accelerator" in kwargs: From a7012862ca2a494796829520355c8bd95fcaa74c Mon Sep 17 00:00:00 2001 From: Hu Mingzhi Date: Tue, 23 Aug 2022 01:17:12 +0000 Subject: [PATCH 32/32] Fix typo --- python/nano/src/bigdl/nano/pytorch/torch_nano.py | 2 +- python/nano/src/bigdl/nano/pytorch/trainer/Trainer.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/python/nano/src/bigdl/nano/pytorch/torch_nano.py b/python/nano/src/bigdl/nano/pytorch/torch_nano.py index 2a9721420ff..4d9a461393f 100644 --- a/python/nano/src/bigdl/nano/pytorch/torch_nano.py +++ b/python/nano/src/bigdl/nano/pytorch/torch_nano.py @@ -71,7 +71,7 @@ def __init__(self, num_processes: int = 1, self.dtype = None if self.use_ipex and precision == 'bf16': # Enable ipex bfloat16 weight prepack and disable native AMP - self.dtype = torch.float16 + self.dtype = torch.bfloat16 precision = 32 # Confirm if cpu supports AVX512 diff --git a/python/nano/src/bigdl/nano/pytorch/trainer/Trainer.py b/python/nano/src/bigdl/nano/pytorch/trainer/Trainer.py index bc70b9d558e..85cbcecdc4f 100644 --- a/python/nano/src/bigdl/nano/pytorch/trainer/Trainer.py +++ b/python/nano/src/bigdl/nano/pytorch/trainer/Trainer.py @@ -120,7 +120,7 @@ def __init__(self, num_processes: int = 1, " without avx512 will crash." "Fall back to regular pytorch.") self.use_ipex = False - elif dtype == torch.float16: + elif dtype == torch.bfloat16: warning("Enable IPEX bfloat16 in a cpu instruction set" " without avx512 will crash. " "Using 32-bit precision")