diff --git a/python/nano/tutorial/training/pytorch/.keep b/python/nano/tutorial/training/pytorch/.keep deleted file mode 100644 index e69de29bb2d..00000000000 diff --git a/python/nano/tutorial/training/pytorch/pytorch_cv_data_pipeline.py b/python/nano/tutorial/training/pytorch/pytorch_cv_data_pipeline.py new file mode 100644 index 00000000000..b6182db76ed --- /dev/null +++ b/python/nano/tutorial/training/pytorch/pytorch_cv_data_pipeline.py @@ -0,0 +1,114 @@ +# +# Copyright 2016 The BigDL Authors. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import torch +from torch import nn +from torch.utils.data import DataLoader +from torchvision.models import resnet18 +from torchmetrics import Accuracy + +from bigdl.nano.pytorch.torch_nano import TorchNano + + +class MyPytorchModule(nn.Module): + def __init__(self): + super().__init__() + self.model = resnet18(pretrained=True) + num_ftrs = self.model.fc.in_features + # Here the size of each output sample is set to 37. + self.model.fc = nn.Linear(num_ftrs, 37) + + def forward(self, x): + return self.model(x) + + +def create_dataloaders(): + # CV Data Pipelines + # + # Computer Vision task often needs a data processing pipeline that sometimes constitutes a + # non-trivial part of the whole training pipeline. + # BigDL-Nano can accelerate computer vision data pipelines. + # + # BigDL-Nano can accelerate computer vision data pipelines + # by providing a drop-in replacement of torch_vision’s datasets and transforms + # + from bigdl.nano.pytorch.vision import transforms + from bigdl.nano.pytorch.vision.datasets import OxfordIIITPet + train_transform = transforms.Compose([transforms.Resize(256), + transforms.RandomCrop(224), + transforms.RandomHorizontalFlip(), + transforms.ColorJitter(brightness=.5, hue=.3), + transforms.ToTensor(), + transforms.Normalize([0.485, 0.456, 0.406], + [0.229, 0.224, 0.225])]) + val_transform = transforms.Compose([transforms.Resize(256), + transforms.CenterCrop(224), + transforms.ToTensor(), + transforms.Normalize([0.485, 0.456, 0.406], + [0.229, 0.224, 0.225])]) + + # Apply data augmentation to the tarin_dataset + train_dataset = OxfordIIITPet(root="/tmp/data", transform=train_transform, download=True) + val_dataset = OxfordIIITPet(root="/tmp/data", transform=val_transform) + + # obtain training indices that will be used for validation + indices = torch.randperm(len(train_dataset)) + val_size = len(train_dataset) // 4 + train_dataset = torch.utils.data.Subset(train_dataset, indices[:-val_size]) + val_dataset = torch.utils.data.Subset(val_dataset, indices[-val_size:]) + + # prepare data loaders + train_dataloader = DataLoader(train_dataset, batch_size=32) + val_dataloader = DataLoader(val_dataset, batch_size=32) + + return train_dataloader, val_dataloader + + +# subclass TorchNano and override its train() method +class MyNano(TorchNano): + # move the body of your existing train function into TorchNano train method + def train(self): + model = MyPytorchModule() + optimizer = torch.optim.SGD(model.parameters(), lr=0.01, momentum=0.9, weight_decay=5e-4) + loss_fuc = torch.nn.CrossEntropyLoss() + + train_loader, val_loader = create_dataloaders() + + # call `setup` to prepare for model, optimizer(s) and dataloader(s) for accelerated training + model, optimizer, (train_loader, val_loader) = self.setup(model, optimizer, + train_loader, val_loader) + num_epochs = 5 + + # EPOCH LOOP + for epoch in range(num_epochs): + + # TRAINING LOOP + model.train() + train_loss, num = 0, 0 + for data, target in train_loader: + optimizer.zero_grad() + output = model(data) + loss = loss_fuc(output, target) + # replace the loss.backward() with self.backward(loss) + self.backward(loss) + optimizer.step() + + train_loss += loss.sum() + num += 1 + print(f'Train Epoch: {epoch}, avg_loss: {train_loss / num}') + + +if __name__ == '__main__': + MyNano().train() diff --git a/python/nano/tutorial/training/pytorch/pytorch_train_ipex.py b/python/nano/tutorial/training/pytorch/pytorch_train_ipex.py new file mode 100644 index 00000000000..d0127fb20e3 --- /dev/null +++ b/python/nano/tutorial/training/pytorch/pytorch_train_ipex.py @@ -0,0 +1,113 @@ +# +# Copyright 2016 The BigDL Authors. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import torch +from torch import nn +from torch.utils.data import DataLoader +from torchvision import transforms +from torchvision.models import resnet18 +from torchvision.datasets import OxfordIIITPet +from torchmetrics import Accuracy + +from bigdl.nano.pytorch.torch_nano import TorchNano + + +class MyPytorchModule(nn.Module): + def __init__(self): + super().__init__() + self.model = resnet18(pretrained=True) + num_ftrs = self.model.fc.in_features + # Here the size of each output sample is set to 37. + self.model.fc = nn.Linear(num_ftrs, 37) + + def forward(self, x): + return self.model(x) + + +def create_dataloaders(): + train_transform = transforms.Compose([transforms.Resize(256), + transforms.RandomCrop(224), + transforms.RandomHorizontalFlip(), + transforms.ColorJitter(brightness=.5, hue=.3), + transforms.ToTensor(), + transforms.Normalize([0.485, 0.456, 0.406], + [0.229, 0.224, 0.225])]) + val_transform = transforms.Compose([transforms.Resize(256), + transforms.CenterCrop(224), + transforms.ToTensor(), + transforms.Normalize([0.485, 0.456, 0.406], + [0.229, 0.224, 0.225])]) + + # Apply data augmentation to the tarin_dataset + train_dataset = OxfordIIITPet(root="/tmp/data", transform=train_transform, download=True) + val_dataset = OxfordIIITPet(root="/tmp/data", transform=val_transform) + + # obtain training indices that will be used for validation + indices = torch.randperm(len(train_dataset)) + val_size = len(train_dataset) // 4 + train_dataset = torch.utils.data.Subset(train_dataset, indices[:-val_size]) + val_dataset = torch.utils.data.Subset(val_dataset, indices[-val_size:]) + + # prepare data loaders + train_dataloader = DataLoader(train_dataset, batch_size=32) + val_dataloader = DataLoader(val_dataset, batch_size=32) + + return train_dataloader, val_dataloader + + +# subclass TorchNano and override its train() method +class MyNano(TorchNano): + # move the body of your existing train function into TorchNano train method + def train(self): + model = MyPytorchModule() + optimizer = torch.optim.SGD(model.parameters(), lr=0.01, momentum=0.9, weight_decay=5e-4) + loss_fuc = torch.nn.CrossEntropyLoss() + train_loader, val_loader = create_dataloaders() + + # call `setup` to prepare for model, optimizer(s) and dataloader(s) for accelerated training + model, optimizer, (train_loader, val_loader) = self.setup(model, optimizer, + train_loader, val_loader) + num_epochs = 5 + + # EPOCH LOOP + for epoch in range(num_epochs): + + # TRAINING LOOP + model.train() + train_loss, num = 0, 0 + for data, target in train_loader: + optimizer.zero_grad() + output = model(data) + loss = loss_fuc(output, target) + # replace the loss.backward() with self.backward(loss) + self.backward(loss) + optimizer.step() + + train_loss += loss.sum() + num += 1 + print(f'Train Epoch: {epoch}, avg_loss: {train_loss / num}') + + +if __name__ == '__main__': + # IPEX Accelerated Training + # + # Intel Extension for PyTorch (a.k.a. IPEX) ecapsulates + # several optimizations for PyTorch and offers an extra + # performance boost on Intel hardware. + # + # In BigDL-Nano, you can easily use IPEX to accelerate custom pytorch training loops + # through the TorchNano by setting use_ipex=True. + # + MyNano(use_ipex=True).train() diff --git a/python/nano/tutorial/training/pytorch/pytorch_train_multi_instance.py b/python/nano/tutorial/training/pytorch/pytorch_train_multi_instance.py new file mode 100644 index 00000000000..dc568c34535 --- /dev/null +++ b/python/nano/tutorial/training/pytorch/pytorch_train_multi_instance.py @@ -0,0 +1,129 @@ +# +# Copyright 2016 The BigDL Authors. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import torch +from torch import nn +from torch.utils.data import DataLoader +import torch.nn.functional as F +from torchvision import transforms +from torchvision.models import resnet18 +from torchvision.datasets import OxfordIIITPet +from torchmetrics import Accuracy + +from pytorch_lightning import seed_everything +from bigdl.nano.pytorch.torch_nano import TorchNano + + +class MyPytorchModule(nn.Module): + def __init__(self): + super().__init__() + self.model = resnet18(pretrained=True) + num_ftrs = self.model.fc.in_features + # Here the size of each output sample is set to 37. + self.model.fc = nn.Linear(num_ftrs, 37) + + def forward(self, x): + return self.model(x) + + +def create_dataloaders(): + train_transform = transforms.Compose([transforms.Resize(256), + transforms.RandomCrop(224), + transforms.RandomHorizontalFlip(), + transforms.ColorJitter(brightness=.5, hue=.3), + transforms.ToTensor(), + transforms.Normalize([0.485, 0.456, 0.406], + [0.229, 0.224, 0.225])]) + val_transform = transforms.Compose([transforms.Resize(256), + transforms.CenterCrop(224), + transforms.ToTensor(), + transforms.Normalize([0.485, 0.456, 0.406], + [0.229, 0.224, 0.225])]) + + # Apply data augmentation to the tarin_dataset + train_dataset = OxfordIIITPet(root="/tmp/data", transform=train_transform, download=True) + val_dataset = OxfordIIITPet(root="/tmp/data", transform=val_transform) + + # obtain training indices that will be used for validation + indices = torch.randperm(len(train_dataset)) + val_size = len(train_dataset) // 4 + train_dataset = torch.utils.data.Subset(train_dataset, indices[:-val_size]) + val_dataset = torch.utils.data.Subset(val_dataset, indices[-val_size:]) + + # prepare data loaders + train_dataloader = DataLoader(train_dataset, batch_size=32) + val_dataloader = DataLoader(val_dataset, batch_size=32) + + return train_dataloader, val_dataloader + + +# subclass TorchNano and override its train() method +class MyNano(TorchNano): + # move the body of your existing train function into TorchNano train method + def train(self): + seed_everything(42) + model = MyPytorchModule() + optimizer = torch.optim.SGD(model.parameters(), lr=0.01, momentum=0.9, weight_decay=5e-4) + loss_fuc = torch.nn.CrossEntropyLoss() + train_loader, val_loader = create_dataloaders() + + # call `setup` to prepare for model, optimizer(s) and dataloader(s) for accelerated training + model, optimizer, (train_loader, val_loader) = self.setup(model, optimizer, + train_loader, val_loader) + num_epochs = 5 + + # EPOCH LOOP + for epoch in range(num_epochs): + + # TRAINING LOOP + model.train() + train_loss, num = 0, 0 + for data, target in train_loader: + optimizer.zero_grad() + output = model(data) + loss = loss_fuc(output, target) + # replace the loss.backward() with self.backward(loss) + self.backward(loss) + optimizer.step() + + train_loss += loss.sum() + num += 1 + print(f'Train Epoch: {epoch}, loss: {train_loss/num}') + + +if __name__ == '__main__': + # Multi-instance Training + # + # It is often beneficial to use multiple instances + # for training if a server contains multiple sockets or + # many cores, so that the workload can make full use of + # all CPU cores. + # + # When using data-parallel training, the batch size is equivalent to + # becoming n times larger, where n is the number of parallel processes. + # We should to scale the learning rate to n times as well to achieve the + # same effect as single instance training. + # However, scaling the learning rate linearly may lead to poor convergence + # at the beginning of training, so we should gradually increase the + # learning rate to n times, and this is called 'learning rate warmup'. + # + # Fortunately, BigDL-Nano makes it very easy to conduct multi-instance + # training correctly. It will handle all these for you. + # + # In BigDL-Nano, you can simply set the num_processes in + # TorchNano to enable multi-instance training. In addition, it will automatically + # apply learning rate scaling and warmup for your training. + # + MyNano(num_processes=2).train()