From 1f38f6977e66e8f7be77235af4d7fe1285b0027c Mon Sep 17 00:00:00 2001 From: zehao-intel Date: Tue, 23 Jul 2024 15:23:29 +0800 Subject: [PATCH 1/2] Refine Pytorch 3x Mixed Precision Example Signed-off-by: zehao-intel --- .../pytorch/cv/mixed_precision/README.md | 3 +- .../cv/mixed_precision/run_benchmark.sh | 2 +- .../mixed_precision/resnet18/README.md | 47 --- .../mixed_precision/resnet18/main.py | 371 ------------------ .../mixed_precision/resnet18/requirements.txt | 4 - .../mixed_precision/resnet18/run_benchmark.sh | 82 ---- 6 files changed, 3 insertions(+), 506 deletions(-) delete mode 100644 examples/3.x_api/pytorch/image_recognition/torchvision_models/mixed_precision/resnet18/README.md delete mode 100644 examples/3.x_api/pytorch/image_recognition/torchvision_models/mixed_precision/resnet18/main.py delete mode 100644 examples/3.x_api/pytorch/image_recognition/torchvision_models/mixed_precision/resnet18/requirements.txt delete mode 100644 examples/3.x_api/pytorch/image_recognition/torchvision_models/mixed_precision/resnet18/run_benchmark.sh diff --git a/examples/3.x_api/pytorch/cv/mixed_precision/README.md b/examples/3.x_api/pytorch/cv/mixed_precision/README.md index 597c9e2a3fb..ede1837b57a 100644 --- a/examples/3.x_api/pytorch/cv/mixed_precision/README.md +++ b/examples/3.x_api/pytorch/cv/mixed_precision/README.md @@ -36,7 +36,8 @@ bash run_autotune.sh --input_model=resnet18 --dataset_location=/path/to/imagenet ## Benchmark ```Shell # run optimized performance -bash run_benchmark.sh --input_model=resnet18 --dataset_location=/path/to/imagenet --mode=performance --batch_size=100 --optimized=true --iters=500 +bash run_benchmark.sh --input_model=resnet18 --dataset_location=/path/to/imagenet --mode=performance --batch_size=20 --optimized=true --iters=500 + # run optimized accuracy bash run_benchmark.sh --input_model=resnet18 --dataset_location=/path/to/imagenet --mode=accuracy --batch_size=1 --optimized=true ``` diff --git a/examples/3.x_api/pytorch/cv/mixed_precision/run_benchmark.sh b/examples/3.x_api/pytorch/cv/mixed_precision/run_benchmark.sh index c34c587c250..28319cc4ffe 100644 --- a/examples/3.x_api/pytorch/cv/mixed_precision/run_benchmark.sh +++ b/examples/3.x_api/pytorch/cv/mixed_precision/run_benchmark.sh @@ -13,7 +13,7 @@ function main { function init_params { iters=100 tuned_checkpoint=saved_results - batch_size=30 + batch_size=20 for var in "$@" do case $var in diff --git a/examples/3.x_api/pytorch/image_recognition/torchvision_models/mixed_precision/resnet18/README.md b/examples/3.x_api/pytorch/image_recognition/torchvision_models/mixed_precision/resnet18/README.md deleted file mode 100644 index 6d92b2dd172..00000000000 --- a/examples/3.x_api/pytorch/image_recognition/torchvision_models/mixed_precision/resnet18/README.md +++ /dev/null @@ -1,47 +0,0 @@ -Step-by-Step -============ - -This document describes the step-by-step instructions for reproducing PyTorch ResNet18 MixedPrecision results with IntelĀ® Neural Compressor. - -# Prerequisite - -### 1. Environment - -PyTorch 1.8 or higher version is needed with pytorch_fx backend. - -```Shell -cd examples/3.x_api/pytorch/image_recognition/torchvision_models/mixed_precision/resnet18 -pip install -r requirements.txt -``` -> Note: Validated PyTorch [Version](/docs/source/installation_guide.md#validated-software-environment). - -### 2. Prepare Dataset - -Download [ImageNet](http://www.image-net.org/) Raw image to dir: /path/to/imagenet. The dir includes below folder: - -```bash -ls /path/to/imagenet -train val -``` - -# Run - -> Note: All torchvision model names can be passed as long as they are included in `torchvision.models`, below are some examples. - -## MixedPrecision -```Shell -python main.py -t -a resnet18 --pretrained /path/to/imagenet -``` - -## Benchmark -```Shell -# run optimized performance, the CPU core 0~3 will be used -bash run_benchmark.sh --input_model=resnet18 --dataset_location=/path/to/imagenet --mode=performance --batch_size=1 --optimized=true --iters=500 -# run optimized accuracy -bash run_benchmark.sh --input_model=resnet18 --dataset_location=/path/to/imagenet --mode=accuracy --batch_size=100 --optimized=true -``` - - - - - diff --git a/examples/3.x_api/pytorch/image_recognition/torchvision_models/mixed_precision/resnet18/main.py b/examples/3.x_api/pytorch/image_recognition/torchvision_models/mixed_precision/resnet18/main.py deleted file mode 100644 index 63758056c72..00000000000 --- a/examples/3.x_api/pytorch/image_recognition/torchvision_models/mixed_precision/resnet18/main.py +++ /dev/null @@ -1,371 +0,0 @@ -import argparse -import os -import random -import shutil -import time -import warnings -import sys - -import torch -import torch.nn as nn -import torch.nn.parallel -import torch.distributed as dist -import torch.optim -import torch.multiprocessing as mp -import torch.utils.data -import torch.utils.data.distributed -import torchvision.transforms as transforms -import torchvision.datasets as datasets -import torchvision.models as models - -model_names = models.list_models(module=models) - -parser = argparse.ArgumentParser(description='PyTorch ImageNet Training') -parser.add_argument('data', metavar='DIR', - help='path to dataset') -parser.add_argument('-a', '--arch', metavar='ARCH', default='resnet18', - choices=model_names, - help='model architecture: ' + - ' | '.join(model_names) + - ' (default: resnet18)') -parser.add_argument('-j', '--workers', default=4, type=int, metavar='N', - help='number of data loading workers (default: 4)') -parser.add_argument('--epochs', default=90, type=int, metavar='N', - help='number of total epochs to run') -parser.add_argument('--start-epoch', default=0, type=int, metavar='N', - help='manual epoch number (useful on restarts)') -parser.add_argument('-b', '--batch-size', default=256, type=int, - metavar='N', - help='mini-batch size (default: 256), this is the total ' - 'batch size of all GPUs on the current node when ' - 'using Data Parallel or Distributed Data Parallel') -parser.add_argument('--lr', '--learning-rate', default=0.1, type=float, - metavar='LR', help='initial learning rate', dest='lr') -parser.add_argument('--momentum', default=0.9, type=float, metavar='M', - help='momentum') -parser.add_argument('--wd', '--weight-decay', default=1e-4, type=float, - metavar='W', help='weight decay (default: 1e-4)', - dest='weight_decay') -parser.add_argument('-p', '--print-freq', default=10, type=int, - metavar='N', help='print frequency (default: 10)') -parser.add_argument('--resume', default='', type=str, metavar='PATH', - help='path to latest checkpoint (default: none)') -parser.add_argument('-e', '--evaluate', dest='evaluate', action='store_true', - help='evaluate model on validation set') -parser.add_argument('-t', '--tune', dest='tune', action='store_true', - help='tune best optimized model') -parser.add_argument('--pretrained', dest='pretrained', action='store_true', - help='use pre-trained model') -parser.add_argument('--world-size', default=-1, type=int, - help='number of nodes for distributed training') -parser.add_argument('--rank', default=-1, type=int, - help='node rank for distributed training') -parser.add_argument('--dist-url', default='tcp://224.66.41.62:23456', type=str, - help='url used to set up distributed training') -parser.add_argument('--dist-backend', default='nccl', type=str, - help='distributed backend') -parser.add_argument('--seed', default=None, type=int, - help='seed for initializing training. ') -parser.add_argument('--gpu', default=None, type=int, - help='GPU id to use.') -parser.add_argument('--ppn', default=1, type=int, - help='number of processes on each node of distributed training') -parser.add_argument('--multiprocessing-distributed', action='store_true', - help='Use multi-processing distributed training to launch ' - 'N processes per node, which has N GPUs. This is the ' - 'fastest way to use PyTorch for either single node or ' - 'multi node data parallel training') -parser.add_argument('-i', "--iter", default=0, type=int, - help='For accuracy measurement only.') -parser.add_argument('-w', "--warmup_iter", default=5, type=int, - help='For benchmark measurement only.') -parser.add_argument('--performance', dest='performance', action='store_true', - help='run benchmark') -parser.add_argument('-r', "--accuracy", dest='accuracy', action='store_true', - help='For accuracy measurement only.') -parser.add_argument("--tuned_checkpoint", default='./saved_results', type=str, metavar='PATH', - help='path to checkpoint tuned by Neural Compressor (default: ./)') -parser.add_argument('--optimized', dest='optimized', action='store_true', - help='run benchmark') - -best_acc1 = 0 - - -def main(): - args = parser.parse_args() - - if 'mobilenet_v2' in args.arch: - import torchvision.models.quantization as models - else: - import torchvision.models as models - - if args.seed is not None: - random.seed(args.seed) - torch.manual_seed(args.seed) - - if args.pretrained: - print("=> using pre-trained model '{}'".format(args.arch)) - model = models.__dict__[args.arch](pretrained=True) - else: - print("=> creating model '{}'".format(args.arch)) - model = models.__dict__[args.arch]() - - # define loss function (criterion) and optimizer - criterion = nn.CrossEntropyLoss() - - optimizer = torch.optim.SGD(model.parameters(), args.lr, - momentum=args.momentum, - weight_decay=args.weight_decay) - - # optionally resume from a checkpoint - if args.resume: - if os.path.isfile(args.resume): - print("=> loading checkpoint '{}'".format(args.resume)) - checkpoint = torch.load(args.resume) - args.start_epoch = checkpoint['epoch'] - best_acc1 = checkpoint['best_acc1'] - if args.gpu is not None: - # best_acc1 may be from a checkpoint from a different GPU - best_acc1 = best_acc1.to(args.gpu) - model.load_state_dict(checkpoint['state_dict']) - optimizer.load_state_dict(checkpoint['optimizer']) - print("=> loaded checkpoint '{}' (epoch {})" - .format(args.resume, checkpoint['epoch'])) - else: - print("=> no checkpoint found at '{}'".format(args.resume)) - - # Data loading code - traindir = os.path.join(args.data, 'train') - valdir = os.path.join(args.data, 'val') - normalize = transforms.Normalize(mean=[0.485, 0.456, 0.406], - std=[0.229, 0.224, 0.225]) - - train_dataset = datasets.ImageFolder( - traindir, - transforms.Compose([ - transforms.RandomResizedCrop(224), - transforms.RandomHorizontalFlip(), - transforms.ToTensor(), - normalize, - ])) - - train_loader = torch.utils.data.DataLoader( - train_dataset, batch_size=args.batch_size, shuffle=True, - num_workers=args.workers, pin_memory=True, sampler=None) - - val_dataset = datasets.ImageFolder(valdir, transforms.Compose([ - transforms.Resize(256), - transforms.CenterCrop(224), - transforms.ToTensor(), - normalize, - ])) - - val_loader = torch.utils.data.DataLoader( - val_dataset, - batch_size=args.batch_size, shuffle=False, - num_workers=args.workers, pin_memory=True) - - if args.evaluate: - validate(val_loader, model, criterion, args) - return - - def eval_func(model): - accu = validate(val_loader, model, criterion, args) - return float(accu) - - if args.tune: - from neural_compressor.torch.quantization import MixPrecisionConfig, TuningConfig, autotune - custom_tune_config = TuningConfig(config_set=[MixPrecisionConfig(dtype=["fp16", "fp32"])], max_trials=1) - best_model = autotune(model=model, tune_config=custom_tune_config, eval_fn=eval_func) - save_dict = { - 'state_dict': best_model.state_dict(), - 'optimizer': optimizer.state_dict(), - } - torch.save(save_dict, os.path.join(args.tuned_checkpoint, "best_model.pt")) - return - - if args.performance or args.accuracy: - model.eval() - if args.optimized: - checkpoint = torch.load(args.tuned_checkpoint) - model.load_state_dict(checkpoint['state_dict']) - optimizer.load_state_dict(checkpoint['optimizer']) - else: - new_model = model - if args.performance or args.accuracy: - validate(val_loader, new_model, criterion, args) - return - - -def train(train_loader, model, criterion, optimizer, epoch, args): - batch_time = AverageMeter('Time', ':6.3f') - data_time = AverageMeter('Data', ':6.3f') - losses = AverageMeter('Loss', ':.4e') - top1 = AverageMeter('Acc@1', ':6.2f') - top5 = AverageMeter('Acc@5', ':6.2f') - progress = ProgressMeter(len(train_loader), batch_time, data_time, losses, top1, - top5, prefix="Epoch: [{}]".format(epoch)) - - # switch to train mode - model.train() - - end = time.time() - for i, (input, target) in enumerate(train_loader): - # measure data loading time - data_time.update(time.time() - end) - - if args.gpu is not None: - input = input.cuda(args.gpu, non_blocking=True) - target = target.cuda(args.gpu, non_blocking=True) - - # compute output - output = model(input) - loss = criterion(output, target) - - # measure accuracy and record loss - acc1, acc5 = accuracy(output, target, topk=(1, 5)) - losses.update(loss.item(), input.size(0)) - top1.update(acc1[0], input.size(0)) - top5.update(acc5[0], input.size(0)) - - # compute gradient and do SGD step - optimizer.zero_grad() - loss.backward() - optimizer.step() - - # measure elapsed time - batch_time.update(time.time() - end) - end = time.time() - - if i % args.print_freq == 0: - progress.print(i) - - -def validate(val_loader, model, criterion, args): - batch_time = AverageMeter('Time', ':6.3f') - losses = AverageMeter('Loss', ':.4e') - top1 = AverageMeter('Acc@1', ':6.2f') - top5 = AverageMeter('Acc@5', ':6.2f') - progress = ProgressMeter(len(val_loader), batch_time, losses, top1, top5, - prefix='Test: ') - - # switch to evaluate mode - model.eval() - - with torch.no_grad(): - latency_list = [] - for i, (input, target) in enumerate(val_loader): - if i >= args.warmup_iter: - start = time.time() - if args.gpu is not None: - input = input.cuda(args.gpu, non_blocking=True) - target = target.cuda(args.gpu, non_blocking=True) - - # compute output - perf_start = time.time() - output = model(input) - perf_end = time.time() - latency_list.append(perf_end-perf_start) - loss = criterion(output, target) - - # measure accuracy and record loss - acc1, acc5 = accuracy(output, target, topk=(1, 5)) - losses.update(loss.item(), input.size(0)) - top1.update(acc1[0], input.size(0)) - top5.update(acc5[0], input.size(0)) - - # measure elapsed time - if i >= args.warmup_iter: - batch_time.update(time.time() - start) - - if i % args.print_freq == 0: - progress.print(i) - - if args.iter > 0 and i >= (args.warmup_iter + args.iter - 1): - break - - if args.accuracy: - print('Batch size = %d' % args.batch_size) - print('Accuracy: {top1:.5f} Accuracy@5 {top5:.5f}' - .format(top1=(top1.avg / 100), top5=(top5.avg / 100))) - if args.performance: - latency = np.array(latency_list[args.warmup_iter:]).mean() / args.batch_size - print("Batch size = {}".format(args.batch_size)) - print("Latency: {:.3f} ms".format(latency * 1000)) - print("Throughput: {:.3f} images/sec".format(1. / latency)) - - return top1.avg - - -def save_checkpoint(state, is_best, filename='checkpoint.pth.tar'): - torch.save(state, filename) - if is_best: - shutil.copyfile(filename, 'model_best.pth.tar') - -class AverageMeter(object): - """Computes and stores the average and current value""" - def __init__(self, name, fmt=':f'): - self.name = name - self.fmt = fmt - self.reset() - - def reset(self): - self.val = 0 - self.avg = 0 - self.sum = 0 - self.count = 0 - - def update(self, val, n=1): - self.val = val - self.sum += val * n - self.count += n - self.avg = self.sum / self.count - - def __str__(self): - fmtstr = '{name} {val' + self.fmt + '} ({avg' + self.fmt + '})' - return fmtstr.format(**self.__dict__) - - -class ProgressMeter(object): - def __init__(self, num_batches, *meters, prefix=""): - self.batch_fmtstr = self._get_batch_fmtstr(num_batches) - self.meters = meters - self.prefix = prefix - - def print(self, batch): - entries = [self.prefix + self.batch_fmtstr.format(batch)] - entries += [str(meter) for meter in self.meters] - print('\t'.join(entries)) - - def _get_batch_fmtstr(self, num_batches): - num_digits = len(str(num_batches // 1)) - fmt = '{:' + str(num_digits) + 'd}' - return '[' + fmt + '/' + fmt.format(num_batches) + ']' - - -def adjust_learning_rate(optimizer, epoch, args): - """Sets the learning rate to the initial LR decayed by 10 every 30 epochs""" - lr = args.lr * (0.1 ** (epoch // 30)) - for param_group in optimizer.param_groups: - param_group['lr'] = lr - - -def accuracy(output, target, topk=(1,)): - """Computes the accuracy over the k top predictions for the specified values of k""" - with torch.no_grad(): - maxk = max(topk) - batch_size = target.size(0) - - _, pred = output.topk(maxk, 1, True, True) - pred = pred.t() - correct = pred.eq(target.view(1, -1).expand_as(pred)) - - res = [] - for k in topk: - correct_k = correct[:k].reshape(-1).float().sum(0, keepdim=True) - res.append(correct_k.mul_(100.0 / batch_size)) - return res - - -if __name__ == '__main__': - main() diff --git a/examples/3.x_api/pytorch/image_recognition/torchvision_models/mixed_precision/resnet18/requirements.txt b/examples/3.x_api/pytorch/image_recognition/torchvision_models/mixed_precision/resnet18/requirements.txt deleted file mode 100644 index 46233c08f4a..00000000000 --- a/examples/3.x_api/pytorch/image_recognition/torchvision_models/mixed_precision/resnet18/requirements.txt +++ /dev/null @@ -1,4 +0,0 @@ -neural-compressor -torch>=1.9.0 -torchvision>=0.10.0 -accelerate diff --git a/examples/3.x_api/pytorch/image_recognition/torchvision_models/mixed_precision/resnet18/run_benchmark.sh b/examples/3.x_api/pytorch/image_recognition/torchvision_models/mixed_precision/resnet18/run_benchmark.sh deleted file mode 100644 index c4bf04288f8..00000000000 --- a/examples/3.x_api/pytorch/image_recognition/torchvision_models/mixed_precision/resnet18/run_benchmark.sh +++ /dev/null @@ -1,82 +0,0 @@ -#!/bin/bash -set -x - -function main { - - init_params "$@" - run_benchmark - -} - -# init params -function init_params { - iters=100 - tuned_checkpoint=saved_results - batch_size=30 - for var in "$@" - do - case $var in - --dataset_location=*) - dataset_location=$(echo $var |cut -f2 -d=) - ;; - --input_model=*) - input_model=$(echo $var |cut -f2 -d=) - ;; - --mode=*) - mode=$(echo $var |cut -f2 -d=) - ;; - --batch_size=*) - batch_size=$(echo $var |cut -f2 -d=) - ;; - --iters=*) - iters=$(echo ${var} |cut -f2 -d=) - ;; - --optimized=*) - optimized=$(echo ${var} |cut -f2 -d=) - ;; - *) - echo "Error: No such parameter: ${var}" - exit 1 - ;; - esac - done - -} - - -# run_benchmark -function run_benchmark { - if [[ ${mode} == "accuracy" ]]; then - mode_cmd=" --accuracy" - elif [[ ${mode} == "performance" ]]; then - mode_cmd=" --iter ${iters} --performance " - else - echo "Error: No such mode: ${mode}" - exit 1 - fi - - if [[ ${optimized} == "true" ]]; then - extra_cmd="--optimized ${dataset_location}" - else - extra_cmd="${dataset_location}" - fi - - if [[ ${mode} == "accuracy" ]]; then - python main.py \ - --pretrained \ - --tuned_checkpoint ${tuned_checkpoint} \ - -b ${batch_size} \ - -a ${input_model} \ - ${mode_cmd} \ - ${extra_cmd} - elif [[ ${mode} == "performance" ]]; then - numactl -m 0 -C 0-3 python main.py \ - --pretrained \ - --tuned_checkpoint ${tuned_checkpoint} \ - -b ${batch_size} \ - -a ${input_model} \ - ${mode_cmd} \ - ${extra_cmd} -} - -main "$@" From 94111cb50350ac2bee05361caaf57698dfab9bf9 Mon Sep 17 00:00:00 2001 From: zehao-intel Date: Tue, 23 Jul 2024 15:48:16 +0800 Subject: [PATCH 2/2] modify config bs Signed-off-by: zehao-intel --- examples/.config/model_params_pytorch_3x.json | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/examples/.config/model_params_pytorch_3x.json b/examples/.config/model_params_pytorch_3x.json index 15cd28907fe..e38749e2ef6 100644 --- a/examples/.config/model_params_pytorch_3x.json +++ b/examples/.config/model_params_pytorch_3x.json @@ -152,7 +152,7 @@ "dataset_location": "/tf_dataset/pytorch/ImageNet/raw", "input_model": "resnet18", "main_script": "main.py", - "batch_size": 100 + "batch_size": 20 } } }