microsoft · dpower4 · Nov 18, 2024 · Nov 18, 2024 · Nov 18, 2024 · Nov 18, 2024
@@ -7,12 +7,12 @@ trigger:
 
 strategy:
   matrix:
-    python-3.6:
-      imageTag: '3.6'
     python-3.7:
       imageTag: '3.7'
     python-3.8:
       imageTag: '3.8'
+    python-3.10:
+      imageTag: '3.10'
     # TODO
     #python-latest:
     #  imageTag: '3'

@@ -10,7 +10,7 @@ pool:
   vmImage: ubuntu-latest
 
 container:
-  image: nvcr.io/nvidia/pytorch:20.12-py3
+  image: nvcr.io/nvidia/pytorch:24.03-py3
   options: '-v /var/run/docker.sock:/var/run/docker.sock -v /usr/bin/docker:/usr/bin/docker -v /usr/bin/sudo:/usr/bin/sudo -v /usr/lib/sudo/:/usr/lib/sudo/'
 
 steps:

@@ -14,7 +14,6 @@ coverage:
         target: 80%
         threshold: 1%
         flags:
-          - cpu-python3.6-unit-test
           - cpu-python3.7-unit-test
           - cuda-unit-test
           - directx-unit-test
@@ -23,7 +22,6 @@ coverage:
         target: 80%
         threshold: 1%
         flags:
-          - cpu-python3.6-unit-test
           - cpu-python3.7-unit-test
           - cuda-unit-test
           - directx-unit-test
@@ -26,7 +26,7 @@ Here're the system requirements for control node.
 ### Requirements
 
 * Latest version of Linux, you're highly encouraged to use Ubuntu 18.04 or later.
-* [Python](https://www.python.org/) version 3.6 or later (which can be checked by running `python3 --version`).
+* [Python](https://www.python.org/) version 3.7 or later (which can be checked by running `python3 --version`).
 * [Pip](https://pip.pypa.io/en/stable/installing/) version 18.0 or later (which can be checked by running `python3 -m pip --version`).
 
 :::note

@@ -328,7 +328,8 @@ A list of models to run, only supported in model-benchmark.
     shufflenet_v2_x0_5 | shufflenet_v2_x1_0 | shufflenet_v2_x1_5 | shufflenet_v2_x2_0 |
     squeezenet1_0 | squeezenet1_1 |
     vgg11 | vgg11_bn | vgg13 | vgg13_bn | vgg16 | vgg16_bn | vgg19_bn | vgg19 |
-    bert-base | bert-large | gpt2-small | gpt2-medium | gpt2-large | gpt2-xl ]
+    bert-base | bert-large | gpt2-small | gpt2-medium | gpt2-large | gpt2-xl |
+    llama2-7b | llama2-13b | llama2-70b ]
   ```
 * default value: `[ ]`
 

@@ -13,6 +13,7 @@ id: model-benchmarks
 Run training or inference tasks with single or half precision for deep learning models,
 including the following categories:
 * GPT: gpt2-small, gpt2-medium, gpt2-large and gpt2-xl
+* LLAMA: llama2-7b, llama2-13b, llama2-70b
 * BERT: bert-base and bert-large
 * LSTM
 * CNN, listed in [`torchvision.models`](https://pytorch.org/vision/0.8/models.html), including:

@@ -0,0 +1,41 @@
+# Copyright (c) Microsoft Corporation.
+# Licensed under the MIT license.
+
+"""Model benchmark example for Llama2-7b (32-layer, 4096-hidden, 32-heads, 7B parameters).
+
+Commands to run:
+  python3 examples/benchmarks/pytorch_llama2.py (Single GPU)
+  python3 -m torch.distributed.launch --use_env --nproc_per_node=8 examples/benchmarks/pytorch_llama2.py \
+      --distributed (Distributed)
+"""
+
+import argparse
+
+from superbench.benchmarks import Platform, Framework, BenchmarkRegistry
+from superbench.common.utils import logger
+
+if __name__ == '__main__':
+    parser = argparse.ArgumentParser()
+    parser.add_argument(
+        '--distributed', action='store_true', default=False, help='Whether to enable distributed training.'
+    )
+    args = parser.parse_args()
+
+    # Specify the model name and benchmark parameters.
+    model_name = 'llama2-7b'
+    parameters = '--batch_size 1 --duration 120 --seq_len 512 --precision float16'
+    if args.distributed:
+        parameters += ' --distributed_impl ddp --distributed_backend nccl'
+
+    # Create context for Llama2 benchmark and run it for 120 seconds.
+    context = BenchmarkRegistry.create_benchmark_context(
+        model_name, platform=Platform.CUDA, parameters=parameters, framework=Framework.PYTORCH
+    )
+
+    benchmark = BenchmarkRegistry.launch_benchmark(context)
+    if benchmark:
+        logger.info(
+            'benchmark: {}, return code: {}, result: {}'.format(
+                benchmark.name, benchmark.return_code, benchmark.result
+            )
+        )
@@ -131,17 +131,17 @@ def run(self):
         'Operating System :: POSIX',
         'Programming Language :: Python :: 3',
         'Programming Language :: Python :: 3 :: Only',
-        'Programming Language :: Python :: 3.6',
         'Programming Language :: Python :: 3.7',
         'Programming Language :: Python :: 3.8',
         'Programming Language :: Python :: 3.9',
+        'Programming Language :: Python :: 3.10',
         'Topic :: System :: Benchmark',
         'Topic :: System :: Clustering',
         'Topic :: System :: Hardware',
     ],
     keywords='benchmark, AI systems',
     packages=find_packages(exclude=['tests']),
-    python_requires='>=3.6, <4',
+    python_requires='>=3.7, <4',
     use_scm_version={
         'local_scheme': 'node-and-date',
         'version_scheme': lambda _: superbench.__version__,
@@ -211,7 +211,7 @@ def run(self):
             'torch': [
                 'torch>=1.7.0a0',
                 'torchvision>=0.8.0a0',
-                'transformers>=4.3.3, <4.23.0',
+                'transformers>=4.28.0',
             ],
             'ort': [
                 'onnx>=1.10.2',

@@ -138,7 +138,7 @@ def export_torchvision_model(self, model_name, batch_size=1):
             model,
             dummy_input,
             file_name,
-            opset_version=10,
+            opset_version=14,
             operator_export_type=torch.onnx.OperatorExportTypes.ONNX_ATEN_FALLBACK,
             input_names=['input'],
             output_names=['output'],
@@ -179,7 +179,7 @@ def export_benchmark_model(self, model_name, batch_size=1, seq_length=512):
             model,
             dummy_input,
             file_name,
-            opset_version=10,
+            opset_version=14,
             do_constant_folding=True,
             input_names=['input'],
             output_names=['output'],

@@ -10,4 +10,4 @@
 from superbench.benchmarks.model_benchmarks.pytorch_lstm import PytorchLSTM
 from superbench.benchmarks.model_benchmarks.megatron_gpt3 import MegatronGPT
 
-__all__ = ['ModelBenchmark', 'PytorchBERT', 'PytorchGPT2', 'PytorchCNN', 'PytorchLSTM', 'MegatronGPT']
+__all__ = ['ModelBenchmark', 'PytorchBERT', 'PytorchGPT2', 'PytorchCNN', 'PytorchLSTM', 'MegatronGPT', 'PytorchLlama']