From 77c8e27e603bea9a69e7647587ca8d509dc1990d Mon Sep 17 00:00:00 2001 From: NanoCode012 Date: Tue, 7 Jul 2020 01:54:39 +0700 Subject: [PATCH 1/3] Convert BatchNorm to SyncBatchNorm --- train.py | 1 + 1 file changed, 1 insertion(+) diff --git a/train.py b/train.py index ce211f1f5322..9b5deee5d5aa 100644 --- a/train.py +++ b/train.py @@ -194,6 +194,7 @@ def train(hyp, tb_writer, opt, device): # DDP mode if device.type != 'cpu' and opt.local_rank != -1: # pip install torch==1.4.0+cku100 torchvision==0.5.0+cu100 -f https://download.pytorch.org/whl/torch_stable.html + model = torch.nn.SyncBatchNorm.convert_sync_batchnorm(model) if mixed_precision: model = DDP(model, delay_allreduce=True) else: From 2aa330139f3cc1237aeb3132245ed7e5d6da1683 Mon Sep 17 00:00:00 2001 From: NanoCode012 Date: Tue, 7 Jul 2020 12:07:40 +0700 Subject: [PATCH 2/3] Remove apex.parallel. Use torch.nn.parallel For future compatibility --- train.py | 8 ++------ 1 file changed, 2 insertions(+), 6 deletions(-) diff --git a/train.py b/train.py index 9b5deee5d5aa..7eaedf5ecdf0 100644 --- a/train.py +++ b/train.py @@ -7,6 +7,7 @@ import torch.optim.lr_scheduler as lr_scheduler import torch.utils.data from torch.utils.tensorboard import SummaryWriter +from torch.nn.parallel import DistributedDataParallel as DDP import test # import test.py to get mAP after each epoch from models.yolo import Model @@ -17,9 +18,7 @@ mixed_precision = True try: # Mixed precision training https://github.com/NVIDIA/apex from apex import amp - from apex.parallel import DistributedDataParallel as DDP except: - from torch.nn.parallel import DistributedDataParallel as DDP print('Apex recommended for faster mixed precision training: https://github.com/NVIDIA/apex') mixed_precision = False # not installed @@ -195,10 +194,7 @@ def train(hyp, tb_writer, opt, device): if device.type != 'cpu' and opt.local_rank != -1: # pip install torch==1.4.0+cku100 torchvision==0.5.0+cu100 -f https://download.pytorch.org/whl/torch_stable.html model = torch.nn.SyncBatchNorm.convert_sync_batchnorm(model) - if mixed_precision: - model = DDP(model, delay_allreduce=True) - else: - model = DDP(model, device_ids=[opt.local_rank]) + model = DDP(model, device_ids=[opt.local_rank], output_device=opt.local_rank) # Model parameters hyp['cls'] *= nc / 80. # scale coco-tuned hyp['cls'] to current dataset From 050b2a5a79a89c9405854d439a1f70f892139b1c Mon Sep 17 00:00:00 2001 From: NanoCode012 Date: Tue, 7 Jul 2020 12:38:14 +0700 Subject: [PATCH 3/3] Add cleanup for process_group --- train.py | 1 + 1 file changed, 1 insertion(+) diff --git a/train.py b/train.py index 7eaedf5ecdf0..27c83cd7e56a 100644 --- a/train.py +++ b/train.py @@ -399,6 +399,7 @@ def train(hyp, tb_writer, opt, device): if not opt.evolve: plot_results() # save as results.png print('%g epochs completed in %.3f hours.\n' % (epoch - start_epoch + 1, (time.time() - t0) / 3600)) + dist.destroy_process_group() if device.type != 'cpu' and torch.cuda.device_count() > 1 else None torch.cuda.empty_cache() return results