From 0dab1dd97709096e8129f8a08115ee83f64f2194 Mon Sep 17 00:00:00 2001 From: Xu Zhao Date: Mon, 17 Jun 2024 15:13:44 -0700 Subject: [PATCH 01/31] Fix on numpy 2.0 upgrade --- requirements.txt | 4 +--- .../{models => canary_models}/DALLE2_pytorch/__init__.py | 0 .../DALLE2_pytorch/dalle2_pytorch.patch | 0 .../{models => canary_models}/DALLE2_pytorch/install.py | 0 .../{models => canary_models}/DALLE2_pytorch/metadata.yaml | 0 .../{models => canary_models}/DALLE2_pytorch/origin | 0 .../{models => canary_models}/DALLE2_pytorch/requirements.txt | 0 torchbenchmark/models/doctr_det_predictor/requirements.txt | 2 +- torchbenchmark/models/doctr_reco_predictor/requirements.txt | 4 ++-- torchbenchmark/models/timm_efficientdet/requirements.txt | 4 ++-- 10 files changed, 6 insertions(+), 8 deletions(-) rename torchbenchmark/{models => canary_models}/DALLE2_pytorch/__init__.py (100%) rename torchbenchmark/{models => canary_models}/DALLE2_pytorch/dalle2_pytorch.patch (100%) rename torchbenchmark/{models => canary_models}/DALLE2_pytorch/install.py (100%) rename torchbenchmark/{models => canary_models}/DALLE2_pytorch/metadata.yaml (100%) rename torchbenchmark/{models => canary_models}/DALLE2_pytorch/origin (100%) rename torchbenchmark/{models => canary_models}/DALLE2_pytorch/requirements.txt (100%) diff --git a/requirements.txt b/requirements.txt index 3a1fe44421..b9d52c5b24 100644 --- a/requirements.txt +++ b/requirements.txt @@ -15,9 +15,7 @@ transformers==4.38.1 MonkeyType psutil pyyaml -# pytorch build script pins numpy version -# https://github.com/pytorch/builder/blob/main/wheel/build_wheel.sh -numpy==1.23.5 +numpy opencv-python submitit pynvml diff --git a/torchbenchmark/models/DALLE2_pytorch/__init__.py b/torchbenchmark/canary_models/DALLE2_pytorch/__init__.py similarity index 100% rename from torchbenchmark/models/DALLE2_pytorch/__init__.py rename to torchbenchmark/canary_models/DALLE2_pytorch/__init__.py diff --git a/torchbenchmark/models/DALLE2_pytorch/dalle2_pytorch.patch b/torchbenchmark/canary_models/DALLE2_pytorch/dalle2_pytorch.patch similarity index 100% rename from torchbenchmark/models/DALLE2_pytorch/dalle2_pytorch.patch rename to torchbenchmark/canary_models/DALLE2_pytorch/dalle2_pytorch.patch diff --git a/torchbenchmark/models/DALLE2_pytorch/install.py b/torchbenchmark/canary_models/DALLE2_pytorch/install.py similarity index 100% rename from torchbenchmark/models/DALLE2_pytorch/install.py rename to torchbenchmark/canary_models/DALLE2_pytorch/install.py diff --git a/torchbenchmark/models/DALLE2_pytorch/metadata.yaml b/torchbenchmark/canary_models/DALLE2_pytorch/metadata.yaml similarity index 100% rename from torchbenchmark/models/DALLE2_pytorch/metadata.yaml rename to torchbenchmark/canary_models/DALLE2_pytorch/metadata.yaml diff --git a/torchbenchmark/models/DALLE2_pytorch/origin b/torchbenchmark/canary_models/DALLE2_pytorch/origin similarity index 100% rename from torchbenchmark/models/DALLE2_pytorch/origin rename to torchbenchmark/canary_models/DALLE2_pytorch/origin diff --git a/torchbenchmark/models/DALLE2_pytorch/requirements.txt b/torchbenchmark/canary_models/DALLE2_pytorch/requirements.txt similarity index 100% rename from torchbenchmark/models/DALLE2_pytorch/requirements.txt rename to torchbenchmark/canary_models/DALLE2_pytorch/requirements.txt diff --git a/torchbenchmark/models/doctr_det_predictor/requirements.txt b/torchbenchmark/models/doctr_det_predictor/requirements.txt index a291164829..c0780e92a9 100644 --- a/torchbenchmark/models/doctr_det_predictor/requirements.txt +++ b/torchbenchmark/models/doctr_det_predictor/requirements.txt @@ -1 +1 @@ -git+https://github.com/mindee/doctr.git@56c8356 +python-doctr diff --git a/torchbenchmark/models/doctr_reco_predictor/requirements.txt b/torchbenchmark/models/doctr_reco_predictor/requirements.txt index 2f9c5c918f..76de2bd25e 100644 --- a/torchbenchmark/models/doctr_reco_predictor/requirements.txt +++ b/torchbenchmark/models/doctr_reco_predictor/requirements.txt @@ -1,2 +1,2 @@ -git+https://github.com/mindee/doctr.git@acb9f64 -rapidfuzz==2.15.1 +python-doctr +rapidfuzz diff --git a/torchbenchmark/models/timm_efficientdet/requirements.txt b/torchbenchmark/models/timm_efficientdet/requirements.txt index 2b82a288e8..2e3c8a1c8d 100644 --- a/torchbenchmark/models/timm_efficientdet/requirements.txt +++ b/torchbenchmark/models/timm_efficientdet/requirements.txt @@ -1,2 +1,2 @@ -pycocotools==2.0.6 -git+https://github.com/rwightman/efficientdet-pytorch.git@79d26d8 +pycocotools +git+https://github.com/rwightman/efficientdet-pytorch.git@d43c9e34cd62d22b4205831bb735f6dd83b8e881 From 006f48266555a53193177ca972fe5c4ac86b223e Mon Sep 17 00:00:00 2001 From: Xu Zhao Date: Tue, 18 Jun 2024 08:56:21 -0700 Subject: [PATCH 02/31] Move doctr models to canary as they depend on numpy<2.0 --- .../{models => canary_models}/doctr_det_predictor/__init__.py | 0 .../{models => canary_models}/doctr_det_predictor/install.py | 0 .../{models => canary_models}/doctr_det_predictor/metadata.yaml | 0 .../doctr_det_predictor/requirements.txt | 0 .../{models => canary_models}/doctr_reco_predictor/__init__.py | 0 .../{models => canary_models}/doctr_reco_predictor/install.py | 0 .../{models => canary_models}/doctr_reco_predictor/metadata.yaml | 0 .../doctr_reco_predictor/requirements.txt | 0 8 files changed, 0 insertions(+), 0 deletions(-) rename torchbenchmark/{models => canary_models}/doctr_det_predictor/__init__.py (100%) rename torchbenchmark/{models => canary_models}/doctr_det_predictor/install.py (100%) rename torchbenchmark/{models => canary_models}/doctr_det_predictor/metadata.yaml (100%) rename torchbenchmark/{models => canary_models}/doctr_det_predictor/requirements.txt (100%) rename torchbenchmark/{models => canary_models}/doctr_reco_predictor/__init__.py (100%) rename torchbenchmark/{models => canary_models}/doctr_reco_predictor/install.py (100%) rename torchbenchmark/{models => canary_models}/doctr_reco_predictor/metadata.yaml (100%) rename torchbenchmark/{models => canary_models}/doctr_reco_predictor/requirements.txt (100%) diff --git a/torchbenchmark/models/doctr_det_predictor/__init__.py b/torchbenchmark/canary_models/doctr_det_predictor/__init__.py similarity index 100% rename from torchbenchmark/models/doctr_det_predictor/__init__.py rename to torchbenchmark/canary_models/doctr_det_predictor/__init__.py diff --git a/torchbenchmark/models/doctr_det_predictor/install.py b/torchbenchmark/canary_models/doctr_det_predictor/install.py similarity index 100% rename from torchbenchmark/models/doctr_det_predictor/install.py rename to torchbenchmark/canary_models/doctr_det_predictor/install.py diff --git a/torchbenchmark/models/doctr_det_predictor/metadata.yaml b/torchbenchmark/canary_models/doctr_det_predictor/metadata.yaml similarity index 100% rename from torchbenchmark/models/doctr_det_predictor/metadata.yaml rename to torchbenchmark/canary_models/doctr_det_predictor/metadata.yaml diff --git a/torchbenchmark/models/doctr_det_predictor/requirements.txt b/torchbenchmark/canary_models/doctr_det_predictor/requirements.txt similarity index 100% rename from torchbenchmark/models/doctr_det_predictor/requirements.txt rename to torchbenchmark/canary_models/doctr_det_predictor/requirements.txt diff --git a/torchbenchmark/models/doctr_reco_predictor/__init__.py b/torchbenchmark/canary_models/doctr_reco_predictor/__init__.py similarity index 100% rename from torchbenchmark/models/doctr_reco_predictor/__init__.py rename to torchbenchmark/canary_models/doctr_reco_predictor/__init__.py diff --git a/torchbenchmark/models/doctr_reco_predictor/install.py b/torchbenchmark/canary_models/doctr_reco_predictor/install.py similarity index 100% rename from torchbenchmark/models/doctr_reco_predictor/install.py rename to torchbenchmark/canary_models/doctr_reco_predictor/install.py diff --git a/torchbenchmark/models/doctr_reco_predictor/metadata.yaml b/torchbenchmark/canary_models/doctr_reco_predictor/metadata.yaml similarity index 100% rename from torchbenchmark/models/doctr_reco_predictor/metadata.yaml rename to torchbenchmark/canary_models/doctr_reco_predictor/metadata.yaml diff --git a/torchbenchmark/models/doctr_reco_predictor/requirements.txt b/torchbenchmark/canary_models/doctr_reco_predictor/requirements.txt similarity index 100% rename from torchbenchmark/models/doctr_reco_predictor/requirements.txt rename to torchbenchmark/canary_models/doctr_reco_predictor/requirements.txt From 42bb7524d8a26df319d9085d0bcedb0db7299ee9 Mon Sep 17 00:00:00 2001 From: Xu Zhao Date: Tue, 18 Jun 2024 09:02:30 -0700 Subject: [PATCH 03/31] Fix doctr models dependency --- .../canary_models/doctr_det_predictor/requirements.txt | 1 - .../canary_models/doctr_reco_predictor/requirements.txt | 2 -- .../{canary_models => models}/doctr_det_predictor/__init__.py | 0 .../{canary_models => models}/doctr_det_predictor/install.py | 0 .../{canary_models => models}/doctr_det_predictor/metadata.yaml | 0 torchbenchmark/models/doctr_det_predictor/requirements.txt | 1 + .../{canary_models => models}/doctr_reco_predictor/__init__.py | 0 .../{canary_models => models}/doctr_reco_predictor/install.py | 0 .../doctr_reco_predictor/metadata.yaml | 0 torchbenchmark/models/doctr_reco_predictor/requirements.txt | 2 ++ 10 files changed, 3 insertions(+), 3 deletions(-) delete mode 100644 torchbenchmark/canary_models/doctr_det_predictor/requirements.txt delete mode 100644 torchbenchmark/canary_models/doctr_reco_predictor/requirements.txt rename torchbenchmark/{canary_models => models}/doctr_det_predictor/__init__.py (100%) rename torchbenchmark/{canary_models => models}/doctr_det_predictor/install.py (100%) rename torchbenchmark/{canary_models => models}/doctr_det_predictor/metadata.yaml (100%) create mode 100644 torchbenchmark/models/doctr_det_predictor/requirements.txt rename torchbenchmark/{canary_models => models}/doctr_reco_predictor/__init__.py (100%) rename torchbenchmark/{canary_models => models}/doctr_reco_predictor/install.py (100%) rename torchbenchmark/{canary_models => models}/doctr_reco_predictor/metadata.yaml (100%) create mode 100644 torchbenchmark/models/doctr_reco_predictor/requirements.txt diff --git a/torchbenchmark/canary_models/doctr_det_predictor/requirements.txt b/torchbenchmark/canary_models/doctr_det_predictor/requirements.txt deleted file mode 100644 index c0780e92a9..0000000000 --- a/torchbenchmark/canary_models/doctr_det_predictor/requirements.txt +++ /dev/null @@ -1 +0,0 @@ -python-doctr diff --git a/torchbenchmark/canary_models/doctr_reco_predictor/requirements.txt b/torchbenchmark/canary_models/doctr_reco_predictor/requirements.txt deleted file mode 100644 index 76de2bd25e..0000000000 --- a/torchbenchmark/canary_models/doctr_reco_predictor/requirements.txt +++ /dev/null @@ -1,2 +0,0 @@ -python-doctr -rapidfuzz diff --git a/torchbenchmark/canary_models/doctr_det_predictor/__init__.py b/torchbenchmark/models/doctr_det_predictor/__init__.py similarity index 100% rename from torchbenchmark/canary_models/doctr_det_predictor/__init__.py rename to torchbenchmark/models/doctr_det_predictor/__init__.py diff --git a/torchbenchmark/canary_models/doctr_det_predictor/install.py b/torchbenchmark/models/doctr_det_predictor/install.py similarity index 100% rename from torchbenchmark/canary_models/doctr_det_predictor/install.py rename to torchbenchmark/models/doctr_det_predictor/install.py diff --git a/torchbenchmark/canary_models/doctr_det_predictor/metadata.yaml b/torchbenchmark/models/doctr_det_predictor/metadata.yaml similarity index 100% rename from torchbenchmark/canary_models/doctr_det_predictor/metadata.yaml rename to torchbenchmark/models/doctr_det_predictor/metadata.yaml diff --git a/torchbenchmark/models/doctr_det_predictor/requirements.txt b/torchbenchmark/models/doctr_det_predictor/requirements.txt new file mode 100644 index 0000000000..83b886053e --- /dev/null +++ b/torchbenchmark/models/doctr_det_predictor/requirements.txt @@ -0,0 +1 @@ +git+https://github.com/xuzhao9/doctr.git diff --git a/torchbenchmark/canary_models/doctr_reco_predictor/__init__.py b/torchbenchmark/models/doctr_reco_predictor/__init__.py similarity index 100% rename from torchbenchmark/canary_models/doctr_reco_predictor/__init__.py rename to torchbenchmark/models/doctr_reco_predictor/__init__.py diff --git a/torchbenchmark/canary_models/doctr_reco_predictor/install.py b/torchbenchmark/models/doctr_reco_predictor/install.py similarity index 100% rename from torchbenchmark/canary_models/doctr_reco_predictor/install.py rename to torchbenchmark/models/doctr_reco_predictor/install.py diff --git a/torchbenchmark/canary_models/doctr_reco_predictor/metadata.yaml b/torchbenchmark/models/doctr_reco_predictor/metadata.yaml similarity index 100% rename from torchbenchmark/canary_models/doctr_reco_predictor/metadata.yaml rename to torchbenchmark/models/doctr_reco_predictor/metadata.yaml diff --git a/torchbenchmark/models/doctr_reco_predictor/requirements.txt b/torchbenchmark/models/doctr_reco_predictor/requirements.txt new file mode 100644 index 0000000000..7bbe0c9b30 --- /dev/null +++ b/torchbenchmark/models/doctr_reco_predictor/requirements.txt @@ -0,0 +1,2 @@ +git+https://github.com/xuzhao9/doctr.git +rapidfuzz From 23512dbebd44a11eb84afbf53c3c071dd105297e Mon Sep 17 00:00:00 2001 From: Xu Zhao Date: Tue, 18 Jun 2024 21:06:39 -0700 Subject: [PATCH 04/31] Pin numpy version --- requirements.txt | 2 +- torchbenchmark/models/Background_Matting/requirements.txt | 2 +- torchbenchmark/models/hf_Whisper/requirements.txt | 3 ++- torchbenchmark/models/tacotron2/requirements.txt | 2 +- torchbenchmark/util/framework/detectron2/requirements.txt | 2 +- 5 files changed, 6 insertions(+), 5 deletions(-) diff --git a/requirements.txt b/requirements.txt index b9d52c5b24..6e209de3e3 100644 --- a/requirements.txt +++ b/requirements.txt @@ -15,7 +15,7 @@ transformers==4.38.1 MonkeyType psutil pyyaml -numpy +numpy==1.21.2 opencv-python submitit pynvml diff --git a/torchbenchmark/models/Background_Matting/requirements.txt b/torchbenchmark/models/Background_Matting/requirements.txt index 188e465c72..a36947527e 100644 --- a/torchbenchmark/models/Background_Matting/requirements.txt +++ b/torchbenchmark/models/Background_Matting/requirements.txt @@ -1,5 +1,5 @@ -numpy opencv-python +numpy==1.21.2 pandas Pillow scikit-image diff --git a/torchbenchmark/models/hf_Whisper/requirements.txt b/torchbenchmark/models/hf_Whisper/requirements.txt index fd0728f16f..fb91cc87b5 100644 --- a/torchbenchmark/models/hf_Whisper/requirements.txt +++ b/torchbenchmark/models/hf_Whisper/requirements.txt @@ -1 +1,2 @@ -numba \ No newline at end of file +numba +numpy==1.21.2 diff --git a/torchbenchmark/models/tacotron2/requirements.txt b/torchbenchmark/models/tacotron2/requirements.txt index 39e8afb167..598a2b11d2 100644 --- a/torchbenchmark/models/tacotron2/requirements.txt +++ b/torchbenchmark/models/tacotron2/requirements.txt @@ -1,4 +1,4 @@ -numpy +numpy==1.21.2 inflect scipy Unidecode diff --git a/torchbenchmark/util/framework/detectron2/requirements.txt b/torchbenchmark/util/framework/detectron2/requirements.txt index b484339e94..8c9ea84c1b 100644 --- a/torchbenchmark/util/framework/detectron2/requirements.txt +++ b/torchbenchmark/util/framework/detectron2/requirements.txt @@ -1,3 +1,3 @@ git+https://github.com/facebookresearch/detectron2.git@1a4df4d omegaconf==2.3.0 -numpy +numpy==1.21.2 From e522b45cd4535b9dfe067aa68d7315755df38f48 Mon Sep 17 00:00:00 2001 From: Wouter Devriendt Date: Tue, 22 Oct 2024 00:41:42 -0700 Subject: [PATCH 05/31] Update requirements.txt (#2523) Summary: attempt to fix dependencies - this is no longer compatible with the latest huggingface_hub, see failing test at https://github.com/pytorch/pytorch/actions/runs/11445304501/job/31843081598 Pull Request resolved: https://github.com/pytorch/benchmark/pull/2523 Reviewed By: huydhn Differential Revision: D64711662 Pulled By: wdvr fbshipit-source-id: eed9143e6e0531840a53ba5ab3fad04894727272 --- torchbenchmark/util/framework/diffusers/requirements.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/torchbenchmark/util/framework/diffusers/requirements.txt b/torchbenchmark/util/framework/diffusers/requirements.txt index 5753b0d36e..5868587596 100644 --- a/torchbenchmark/util/framework/diffusers/requirements.txt +++ b/torchbenchmark/util/framework/diffusers/requirements.txt @@ -1 +1 @@ -diffusers==0.20.2 \ No newline at end of file +diffusers==0.30.3 From 0bc829a10fb5d445d82c8bd5f39818172f261ff0 Mon Sep 17 00:00:00 2001 From: Mikayla Gawarecki Date: Tue, 22 Oct 2024 08:29:59 -0700 Subject: [PATCH 06/31] Fixes to prep for weights_only default flip (#2514) Summary: Some fixes for https://github.com/pytorch/pytorch/pull/137602 Pull Request resolved: https://github.com/pytorch/benchmark/pull/2514 Reviewed By: xuzhao9 Differential Revision: D64628614 Pulled By: mikaylagawarecki fbshipit-source-id: edebf25cc6648919d5673a3baeaffdac26e5b91f --- .../models/functorch_maml_omniglot/__init__.py | 18 ++++++++++++++++-- .../models/maml_omniglot/__init__.py | 16 ++++++++++++++-- .../models/opacus_cifar10/__init__.py | 8 ++++++++ 3 files changed, 38 insertions(+), 4 deletions(-) diff --git a/torchbenchmark/models/functorch_maml_omniglot/__init__.py b/torchbenchmark/models/functorch_maml_omniglot/__init__.py index 84fb716ed4..7258dddec8 100644 --- a/torchbenchmark/models/functorch_maml_omniglot/__init__.py +++ b/torchbenchmark/models/functorch_maml_omniglot/__init__.py @@ -7,6 +7,7 @@ from pathlib import Path from typing import Tuple +import numpy as np from ...util.model import BenchmarkModel from torchbenchmark.tasks import OTHER @@ -70,8 +71,21 @@ def __init__(self, test, device, batch_size=None, extra_args=[]): self.model = net root = str(Path(__file__).parent.parent) - self.meta_inputs = torch.load(f'{root}/maml_omniglot/batch.pt') - self.meta_inputs = tuple([torch.from_numpy(i).to(self.device) for i in self.meta_inputs]) + with torch.serialization.safe_globals( + [ + np.core.multiarray._reconstruct, + np.ndarray, + np.dtype, + np.dtypes.Float32DType, + np.dtypes.Int64DType, + ] + ): + self.meta_inputs = torch.load( + f"{root}/maml_omniglot/batch.pt", weights_only=True + ) + self.meta_inputs = tuple( + [torch.from_numpy(i).to(self.device) for i in self.meta_inputs] + ) self.example_inputs = (self.meta_inputs[0][0],) def get_module(self): diff --git a/torchbenchmark/models/maml_omniglot/__init__.py b/torchbenchmark/models/maml_omniglot/__init__.py index ac36728203..6096e8f71e 100644 --- a/torchbenchmark/models/maml_omniglot/__init__.py +++ b/torchbenchmark/models/maml_omniglot/__init__.py @@ -16,6 +16,7 @@ # See the License for the specific language governing permissions and # limitations under the License. +import numpy as np import torch import torch.optim as optim import torch.nn as nn @@ -75,8 +76,19 @@ def __init__(self, test, device, batch_size=None, extra_args=[]): self.model = net root = str(Path(__file__).parent) - self.meta_inputs = torch.load(f'{root}/batch.pt') - self.meta_inputs = tuple([torch.from_numpy(i).to(self.device) for i in self.meta_inputs]) + with torch.serialization.safe_globals( + [ + np.core.multiarray._reconstruct, + np.ndarray, + np.dtype, + np.dtypes.Float32DType, + np.dtypes.Int64DType, + ] + ): + self.meta_inputs = torch.load(f"{root}/batch.pt", weights_only=True) + self.meta_inputs = tuple( + [torch.from_numpy(i).to(self.device) for i in self.meta_inputs] + ) self.example_inputs = (self.meta_inputs[0][0],) def get_module(self): diff --git a/torchbenchmark/models/opacus_cifar10/__init__.py b/torchbenchmark/models/opacus_cifar10/__init__.py index f66a49dbc2..95072916d1 100644 --- a/torchbenchmark/models/opacus_cifar10/__init__.py +++ b/torchbenchmark/models/opacus_cifar10/__init__.py @@ -1,3 +1,5 @@ +import os + import torch import torch.optim as optim import torch.nn as nn @@ -24,7 +26,13 @@ def __init__(self, test, device, batch_size=None, extra_args=[]): super().__init__(test=test, device=device, batch_size=batch_size, extra_args=extra_args) self.model = models.resnet18(num_classes=10) + prev_wo_envvar = os.environ.get("TORCH_FORCE_NO_WEIGHTS_ONLY_LOAD", None) + os.environ["TORCH_FORCE_NO_WEIGHTS_ONLY_LOAD"] = "1" self.model = ModuleValidator.fix(self.model) + if prev_wo_envvar is None: + del os.environ["TORCH_FORCE_NO_WEIGHTS_ONLY_LOAD"] + else: + os.environ["TORCH_FORCE_NO_WEIGHTS_ONLY_LOAD"] = prev_wo_envvar self.model = self.model.to(device) # Cifar10 images are 32x32 and have 10 classes From 766a5e3a189384659fd35a68c3b17b88c761aaac Mon Sep 17 00:00:00 2001 From: Mikayla Gawarecki Date: Fri, 25 Oct 2024 10:25:04 -0700 Subject: [PATCH 07/31] Account for older numpy versions in #2514 (#2524) Summary: Pull Request resolved: https://github.com/pytorch/benchmark/pull/2524 Reviewed By: kit1980 Differential Revision: D64771621 Pulled By: mikaylagawarecki fbshipit-source-id: 545f3d528cfbe2668c8d37e98e99423cd77a8e8e --- .../models/functorch_maml_omniglot/__init__.py | 12 ++++++++++-- torchbenchmark/models/maml_omniglot/__init__.py | 12 ++++++++++-- 2 files changed, 20 insertions(+), 4 deletions(-) diff --git a/torchbenchmark/models/functorch_maml_omniglot/__init__.py b/torchbenchmark/models/functorch_maml_omniglot/__init__.py index 7258dddec8..c79acbceed 100644 --- a/torchbenchmark/models/functorch_maml_omniglot/__init__.py +++ b/torchbenchmark/models/functorch_maml_omniglot/__init__.py @@ -76,8 +76,16 @@ def __init__(self, test, device, batch_size=None, extra_args=[]): np.core.multiarray._reconstruct, np.ndarray, np.dtype, - np.dtypes.Float32DType, - np.dtypes.Int64DType, + ( + type(np.dtype(np.float32)) + if np.__version__ < "1.25.0" + else np.dtypes.Float32DType + ), + ( + type(np.dtype(np.int64)) + if np.__version__ < "1.25.0" + else np.dtypes.Int64DType + ), ] ): self.meta_inputs = torch.load( diff --git a/torchbenchmark/models/maml_omniglot/__init__.py b/torchbenchmark/models/maml_omniglot/__init__.py index 6096e8f71e..0078ecd857 100644 --- a/torchbenchmark/models/maml_omniglot/__init__.py +++ b/torchbenchmark/models/maml_omniglot/__init__.py @@ -81,8 +81,16 @@ def __init__(self, test, device, batch_size=None, extra_args=[]): np.core.multiarray._reconstruct, np.ndarray, np.dtype, - np.dtypes.Float32DType, - np.dtypes.Int64DType, + ( + type(np.dtype(np.float32)) + if np.__version__ < "1.25.0" + else np.dtypes.Float32DType + ), + ( + type(np.dtype(np.int64)) + if np.__version__ < "1.25.0" + else np.dtypes.Int64DType + ), ] ): self.meta_inputs = torch.load(f"{root}/batch.pt", weights_only=True) From d3cc4969c9bdb04d394be3d07e587e36415a6008 Mon Sep 17 00:00:00 2001 From: Huy Do Date: Mon, 16 Dec 2024 21:15:00 -0800 Subject: [PATCH 08/31] Resurrect ao benchmark --- .github/workflows/_linux-benchmark-cuda.yml | 8 +------- .github/workflows/torchao.yml | 8 +++++--- 2 files changed, 6 insertions(+), 10 deletions(-) diff --git a/.github/workflows/_linux-benchmark-cuda.yml b/.github/workflows/_linux-benchmark-cuda.yml index e1814d869f..9d5e01ad06 100644 --- a/.github/workflows/_linux-benchmark-cuda.yml +++ b/.github/workflows/_linux-benchmark-cuda.yml @@ -30,7 +30,7 @@ jobs: benchmark: # Don't run on forked repos if: github.repository_owner == 'pytorch' - runs-on: [a100-runner] + runs-on: linux.aws.a100 timeout-minutes: 1440 # 24 hours environment: docker-s3-upload env: @@ -45,12 +45,6 @@ jobs: uses: actions/checkout@v3 with: path: benchmark - - name: Tune Nvidia GPU - run: | - sudo nvidia-smi -pm 1 - sudo nvidia-smi -ac 1215,1410 - sudo ldconfig - nvidia-smi - name: Remove result if it already exists if: always() run: | diff --git a/.github/workflows/torchao.yml b/.github/workflows/torchao.yml index b5d43d9b9b..99c5d95ff1 100644 --- a/.github/workflows/torchao.yml +++ b/.github/workflows/torchao.yml @@ -6,9 +6,10 @@ on: jobs: timm: + if: false uses: ./.github/workflows/_linux-benchmark-cuda.yml with: - userbenchmark: "torchao" + userbenchmark: torchao userbenchmark-run-args: "--ci --dashboard --timm" secrets: HUGGING_FACE_HUB_TOKEN: ${{ secrets.HUGGING_FACE_HUB_TOKEN }} @@ -17,16 +18,17 @@ jobs: torchbench: uses: ./.github/workflows/_linux-benchmark-cuda.yml with: - userbenchmark: "torchao" + userbenchmark: torchao userbenchmark-run-args: "--ci --dashboard --torchbench" secrets: HUGGING_FACE_HUB_TOKEN: ${{ secrets.HUGGING_FACE_HUB_TOKEN }} AWS_ACCESS_KEY_ID: ${{ secrets.AWS_ACCESS_KEY_ID }} AWS_SECRET_ACCESS_KEY: ${{ secrets.AWS_SECRET_ACCESS_KEY }} huggingface: + if: false uses: ./.github/workflows/_linux-benchmark-cuda.yml with: - userbenchmark: "torchao" + userbenchmark: torchao userbenchmark-run-args: "--ci --dashboard --huggingface" secrets: HUGGING_FACE_HUB_TOKEN: ${{ secrets.HUGGING_FACE_HUB_TOKEN }} From 9b24c0a9b6e0c186c8dbb9af21e8a43f3f790b26 Mon Sep 17 00:00:00 2001 From: Huy Do Date: Mon, 16 Dec 2024 21:41:51 -0800 Subject: [PATCH 09/31] Try schema v3 --- .github/workflows/_linux-benchmark-cuda.yml | 92 ++++++++------------- .github/workflows/torchao.yml | 2 + userbenchmark/torchao/run.py | 1 + 3 files changed, 38 insertions(+), 57 deletions(-) diff --git a/.github/workflows/_linux-benchmark-cuda.yml b/.github/workflows/_linux-benchmark-cuda.yml index 9d5e01ad06..a85ff252e8 100644 --- a/.github/workflows/_linux-benchmark-cuda.yml +++ b/.github/workflows/_linux-benchmark-cuda.yml @@ -15,81 +15,59 @@ on: required: false description: | HF auth token to avoid rate limits when downloading models or datasets from hub - AWS_ACCESS_KEY_ID: - required: true - description: | - AWS access token for S3 uploading - AWS_SECRET_ACCESS_KEY: - required: true - description: | - AWS secret access key for S3 uploading jobs: - # Run a specific userbenchmark with given arguments - # Need to pass in userbenchmark name and arguments benchmark: # Don't run on forked repos if: github.repository_owner == 'pytorch' runs-on: linux.aws.a100 - timeout-minutes: 1440 # 24 hours + timeout-minutes: 1440 environment: docker-s3-upload env: - BASE_CONDA_ENV: "torchbench" - CONDA_ENV: "userbenchmark" - SETUP_SCRIPT: "/workspace/setup_instance.sh" + OUTPUT_DIR: '.userbenchmark' HUGGING_FACE_HUB_TOKEN: ${{ secrets.HUGGING_FACE_HUB_TOKEN }} - AWS_ACCESS_KEY_ID: ${{ secrets.AWS_ACCESS_KEY_ID }} - AWS_SECRET_ACCESS_KEY: ${{ secrets.AWS_SECRET_ACCESS_KEY }} steps: - name: Checkout TorchBench uses: actions/checkout@v3 with: path: benchmark + - name: Remove result if it already exists - if: always() - run: | - # remove old results if exists - if [ -d benchmark-output ]; then rm -Rf benchmark-output; fi - pushd benchmark - if [ -d .userbenchmark ]; then rm -Rf .userbenchmark; fi - - name: Clone and setup conda env + shell: bash + working-directory: benchmark run: | - CONDA_ENV=${BASE_CONDA_ENV} . "${SETUP_SCRIPT}" - conda create --name "${CONDA_ENV}" --clone "${BASE_CONDA_ENV}" + set -eux + + if [[ -d "${OUTPUT_DIR}" ]]; then + rm -rf "${OUTPUT_DIR}" + fi + + - name: Setup miniconda + uses: pytorch/test-infra/.github/actions/setup-miniconda@main + with: + python-version: "3.9" + - name: Install benchmark + shell: bash + working-directory: benchmark run: | - . "${SETUP_SCRIPT}" - pushd benchmark - python install.py + set -eux + ${CONDA_RUN} python install.py + - name: Run benchmark + shell: bash + working-directory: benchmark run: | - . "${SETUP_SCRIPT}" - pushd benchmark - python run_benchmark.py ${{ inputs.userbenchmark }} ${{ inputs.userbenchmark-run-args }} - - name: Copy benchmark logs - if: always() - run: | - pushd benchmark - cp -r ./.userbenchmark/${{ inputs.userbenchmark }} ../benchmark-output - - name: Upload benchmark result to GH Actions Artifact - uses: actions/upload-artifact@v3 - if: always() + set -eux + ${CONDA_RUN} python run_benchmark.py ${{ inputs.userbenchmark }} ${{ inputs.userbenchmark-run-args }} + + # DEBUG + ls -laR "${OUTPUT_DIR}" + + - name: Upload the benchmark results to OSS benchmark database for the dashboard + uses: pytorch/test-infra/.github/actions/upload-benchmark-results@main with: - name: ${{ inputs.userbenchmark }} benchmarking result - path: benchmark-output/ - - name: Copy artifact and upload to Amazon S3 - env: - WORKFLOW_RUN_ID: ${{ github.run_id }} - WORKFLOW_RUN_ATTEMPT: ${{ github.run_attempt }} - run: | - . "${SETUP_SCRIPT}" - pushd benchmark - # Upload the result json to Amazon S3 - python ./scripts/userbenchmark/upload_s3_csv.py --s3-prefix torchbench-csv --userbenchmark ${{ inputs.userbenchmark }} \ - --upload-path ../benchmark-output --match-filename "^${{ inputs.userbenchmark }}.*\.csv" - - name: Clean up Conda env - if: always() - run: | - . "${SETUP_SCRIPT}" - conda deactivate && conda deactivate - conda remove -n "${CONDA_ENV}" --all + benchmark-results-dir: ${{ env.OUTPUT_DIR }} + dry-run: true # DEBUG: TO BE REMOVED + schema-version: v3 + github-token: ${{ secrets.GITHUB_TOKEN }} diff --git a/.github/workflows/torchao.yml b/.github/workflows/torchao.yml index 99c5d95ff1..a570647f8d 100644 --- a/.github/workflows/torchao.yml +++ b/.github/workflows/torchao.yml @@ -1,5 +1,7 @@ name: Torchao nightly workflow (A100) on: + # DEBUG + pull_request: workflow_dispatch: schedule: - cron: '00 18 * * *' # run at 6:00 PM UTC, K8s containers will roll out at 12PM EST diff --git a/userbenchmark/torchao/run.py b/userbenchmark/torchao/run.py index 626ae55cb0..b2f67f6848 100644 --- a/userbenchmark/torchao/run.py +++ b/userbenchmark/torchao/run.py @@ -29,6 +29,7 @@ def _get_ci_args( f"--{experiment}", "--output", f"{str(OUTPUT_DIR.joinpath(output_file_name).resolve())}", + "--only hf_T5", # DEBUG: TO BE REMOVED ] return ci_args From d6c8d6dcfa5e6ad0b60e44829acb3cc98e253d8f Mon Sep 17 00:00:00 2001 From: Huy Do Date: Mon, 16 Dec 2024 21:47:12 -0800 Subject: [PATCH 10/31] Cleanup AWS credential --- .github/workflows/torchao.yml | 6 ------ 1 file changed, 6 deletions(-) diff --git a/.github/workflows/torchao.yml b/.github/workflows/torchao.yml index a570647f8d..d6ac97bba0 100644 --- a/.github/workflows/torchao.yml +++ b/.github/workflows/torchao.yml @@ -15,8 +15,6 @@ jobs: userbenchmark-run-args: "--ci --dashboard --timm" secrets: HUGGING_FACE_HUB_TOKEN: ${{ secrets.HUGGING_FACE_HUB_TOKEN }} - AWS_ACCESS_KEY_ID: ${{ secrets.AWS_ACCESS_KEY_ID }} - AWS_SECRET_ACCESS_KEY: ${{ secrets.AWS_SECRET_ACCESS_KEY }} torchbench: uses: ./.github/workflows/_linux-benchmark-cuda.yml with: @@ -24,8 +22,6 @@ jobs: userbenchmark-run-args: "--ci --dashboard --torchbench" secrets: HUGGING_FACE_HUB_TOKEN: ${{ secrets.HUGGING_FACE_HUB_TOKEN }} - AWS_ACCESS_KEY_ID: ${{ secrets.AWS_ACCESS_KEY_ID }} - AWS_SECRET_ACCESS_KEY: ${{ secrets.AWS_SECRET_ACCESS_KEY }} huggingface: if: false uses: ./.github/workflows/_linux-benchmark-cuda.yml @@ -34,8 +30,6 @@ jobs: userbenchmark-run-args: "--ci --dashboard --huggingface" secrets: HUGGING_FACE_HUB_TOKEN: ${{ secrets.HUGGING_FACE_HUB_TOKEN }} - AWS_ACCESS_KEY_ID: ${{ secrets.AWS_ACCESS_KEY_ID }} - AWS_SECRET_ACCESS_KEY: ${{ secrets.AWS_SECRET_ACCESS_KEY }} concurrency: group: ${{ github.workflow }}-${{ github.event.pull_request.number || github.sha }}-${{ github.event_name == 'workflow_dispatch' }} From f8f53691bc40868e731ec7624f529e8cfa18ad5a Mon Sep 17 00:00:00 2001 From: Huy Do Date: Mon, 16 Dec 2024 21:54:40 -0800 Subject: [PATCH 11/31] Install requirements.txt --- .github/workflows/_linux-benchmark-cuda.yml | 1 + 1 file changed, 1 insertion(+) diff --git a/.github/workflows/_linux-benchmark-cuda.yml b/.github/workflows/_linux-benchmark-cuda.yml index a85ff252e8..f6c96cf2e0 100644 --- a/.github/workflows/_linux-benchmark-cuda.yml +++ b/.github/workflows/_linux-benchmark-cuda.yml @@ -52,6 +52,7 @@ jobs: working-directory: benchmark run: | set -eux + ${CONDA_RUN} pip install -r requirements.txt ${CONDA_RUN} python install.py - name: Run benchmark From 46c7b24fed3f9926ac712f4cbf6e5ff97a85b200 Mon Sep 17 00:00:00 2001 From: Huy Do Date: Mon, 16 Dec 2024 22:15:16 -0800 Subject: [PATCH 12/31] Install torch --- .github/workflows/_linux-benchmark-cuda.yml | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/.github/workflows/_linux-benchmark-cuda.yml b/.github/workflows/_linux-benchmark-cuda.yml index f6c96cf2e0..15c44c25ec 100644 --- a/.github/workflows/_linux-benchmark-cuda.yml +++ b/.github/workflows/_linux-benchmark-cuda.yml @@ -47,12 +47,19 @@ jobs: with: python-version: "3.9" - - name: Install benchmark + - name: Install dependencies shell: bash working-directory: benchmark run: | set -eux + ${CONDA_RUN} pip install torch torchvision torchaudio ${CONDA_RUN} pip install -r requirements.txt + + - name: Install benchmark + shell: bash + working-directory: benchmark + run: | + set -eux ${CONDA_RUN} python install.py - name: Run benchmark From 2fd4a79e4b37e87fd579c4e3a30118d295e7af6b Mon Sep 17 00:00:00 2001 From: Huy Do Date: Mon, 16 Dec 2024 22:36:31 -0800 Subject: [PATCH 13/31] Try one model first --- .github/workflows/_linux-benchmark-cuda.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/_linux-benchmark-cuda.yml b/.github/workflows/_linux-benchmark-cuda.yml index 15c44c25ec..5cad073ac5 100644 --- a/.github/workflows/_linux-benchmark-cuda.yml +++ b/.github/workflows/_linux-benchmark-cuda.yml @@ -60,7 +60,7 @@ jobs: working-directory: benchmark run: | set -eux - ${CONDA_RUN} python install.py + ${CONDA_RUN} python install.py --models hf_T5 - name: Run benchmark shell: bash From 24aab2295dc0f359c75be9e08440ce4e9a842975 Mon Sep 17 00:00:00 2001 From: Huy Do Date: Mon, 16 Dec 2024 23:04:11 -0800 Subject: [PATCH 14/31] It's confusing --- .github/workflows/_linux-benchmark-cuda.yml | 1 - 1 file changed, 1 deletion(-) diff --git a/.github/workflows/_linux-benchmark-cuda.yml b/.github/workflows/_linux-benchmark-cuda.yml index 5cad073ac5..3a412d9dcf 100644 --- a/.github/workflows/_linux-benchmark-cuda.yml +++ b/.github/workflows/_linux-benchmark-cuda.yml @@ -53,7 +53,6 @@ jobs: run: | set -eux ${CONDA_RUN} pip install torch torchvision torchaudio - ${CONDA_RUN} pip install -r requirements.txt - name: Install benchmark shell: bash From 9c0367f2d65aaa5bd7943b1a82afdb3e5d3fdd03 Mon Sep 17 00:00:00 2001 From: Huy Do Date: Mon, 16 Dec 2024 23:14:39 -0800 Subject: [PATCH 15/31] Testing pinning numpy --- requirements.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/requirements.txt b/requirements.txt index 1782092c70..da3d4978f2 100644 --- a/requirements.txt +++ b/requirements.txt @@ -16,7 +16,7 @@ transformers==4.44.2 MonkeyType psutil pyyaml -numpy +numpy==1.21.2 opencv-python submitit pynvml>=12.0.0 From c8105648a3f17b4c3aaa0a68c6991a735aea1dae Mon Sep 17 00:00:00 2001 From: Huy Do Date: Mon, 16 Dec 2024 23:24:52 -0800 Subject: [PATCH 16/31] Resolve conflict --- torchbenchmark/models/hf_Whisper/requirements.txt | 2 -- 1 file changed, 2 deletions(-) delete mode 100644 torchbenchmark/models/hf_Whisper/requirements.txt diff --git a/torchbenchmark/models/hf_Whisper/requirements.txt b/torchbenchmark/models/hf_Whisper/requirements.txt deleted file mode 100644 index fb91cc87b5..0000000000 --- a/torchbenchmark/models/hf_Whisper/requirements.txt +++ /dev/null @@ -1,2 +0,0 @@ -numba -numpy==1.21.2 From af7c2675e93f74daeb9c2742c6f2b83c0e7105d6 Mon Sep 17 00:00:00 2001 From: Huy Do Date: Mon, 16 Dec 2024 23:27:12 -0800 Subject: [PATCH 17/31] Clean up a bit more --- torchbenchmark/models/maml_omniglot/__init__.py | 5 ----- torchbenchmark/models/opacus_cifar10/__init__.py | 7 ------- 2 files changed, 12 deletions(-) diff --git a/torchbenchmark/models/maml_omniglot/__init__.py b/torchbenchmark/models/maml_omniglot/__init__.py index 48b6a75062..4b0a84ab8d 100644 --- a/torchbenchmark/models/maml_omniglot/__init__.py +++ b/torchbenchmark/models/maml_omniglot/__init__.py @@ -16,11 +16,6 @@ # See the License for the specific language governing permissions and # limitations under the License. -import numpy as np -import torch -import torch.optim as optim -import torch.nn as nn -import torch.nn.functional as F from pathlib import Path from typing import Tuple diff --git a/torchbenchmark/models/opacus_cifar10/__init__.py b/torchbenchmark/models/opacus_cifar10/__init__.py index 51a5839a34..efab73b3ac 100644 --- a/torchbenchmark/models/opacus_cifar10/__init__.py +++ b/torchbenchmark/models/opacus_cifar10/__init__.py @@ -1,11 +1,4 @@ import os -import torch -import torch.optim as optim -import torch.nn as nn -import torch.utils.data as data -import torchvision.models as models -from opacus import PrivacyEngine -from opacus.validators.module_validator import ModuleValidator from typing import Tuple import torch From e25fde1dd3ae51b9c745eb713bb5b7dc66fb6cdc Mon Sep 17 00:00:00 2001 From: Huy Do Date: Mon, 16 Dec 2024 23:33:51 -0800 Subject: [PATCH 18/31] Wth --- requirements.txt | 2 +- torchbenchmark/models/Background_Matting/requirements.txt | 2 +- torchbenchmark/models/tacotron2/requirements.txt | 2 +- torchbenchmark/util/framework/detectron2/requirements.txt | 2 +- 4 files changed, 4 insertions(+), 4 deletions(-) diff --git a/requirements.txt b/requirements.txt index da3d4978f2..1782092c70 100644 --- a/requirements.txt +++ b/requirements.txt @@ -16,7 +16,7 @@ transformers==4.44.2 MonkeyType psutil pyyaml -numpy==1.21.2 +numpy opencv-python submitit pynvml>=12.0.0 diff --git a/torchbenchmark/models/Background_Matting/requirements.txt b/torchbenchmark/models/Background_Matting/requirements.txt index a36947527e..188e465c72 100644 --- a/torchbenchmark/models/Background_Matting/requirements.txt +++ b/torchbenchmark/models/Background_Matting/requirements.txt @@ -1,5 +1,5 @@ +numpy opencv-python -numpy==1.21.2 pandas Pillow scikit-image diff --git a/torchbenchmark/models/tacotron2/requirements.txt b/torchbenchmark/models/tacotron2/requirements.txt index 598a2b11d2..39e8afb167 100644 --- a/torchbenchmark/models/tacotron2/requirements.txt +++ b/torchbenchmark/models/tacotron2/requirements.txt @@ -1,4 +1,4 @@ -numpy==1.21.2 +numpy inflect scipy Unidecode diff --git a/torchbenchmark/util/framework/detectron2/requirements.txt b/torchbenchmark/util/framework/detectron2/requirements.txt index 2a049c08b1..f38075ddd7 100644 --- a/torchbenchmark/util/framework/detectron2/requirements.txt +++ b/torchbenchmark/util/framework/detectron2/requirements.txt @@ -1,3 +1,3 @@ git+https://github.com/facebookresearch/detectron2.git@0df2d73d0013db7de629602c23cc120219b4f2b8 omegaconf==2.3.0 -numpy==1.21.2 +numpy From 217720dfeb7250748bf7250ecf4e5e6e56735daa Mon Sep 17 00:00:00 2001 From: Huy Do Date: Tue, 17 Dec 2024 00:01:23 -0800 Subject: [PATCH 19/31] Maybe --- .github/workflows/_linux-benchmark-cuda.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/_linux-benchmark-cuda.yml b/.github/workflows/_linux-benchmark-cuda.yml index 3a412d9dcf..7460a53140 100644 --- a/.github/workflows/_linux-benchmark-cuda.yml +++ b/.github/workflows/_linux-benchmark-cuda.yml @@ -59,7 +59,7 @@ jobs: working-directory: benchmark run: | set -eux - ${CONDA_RUN} python install.py --models hf_T5 + ${CONDA_RUN} python install.py --numpy --models hf_T5 - name: Run benchmark shell: bash From 368caaf30f7df6e7badb0e3a5d9b8f562caf2609 Mon Sep 17 00:00:00 2001 From: Huy Do Date: Tue, 17 Dec 2024 00:06:38 -0800 Subject: [PATCH 20/31] Finally --- .github/workflows/_linux-benchmark-cuda.yml | 2 +- userbenchmark/torchao/run.py | 3 ++- 2 files changed, 3 insertions(+), 2 deletions(-) diff --git a/.github/workflows/_linux-benchmark-cuda.yml b/.github/workflows/_linux-benchmark-cuda.yml index 7460a53140..3bbaa1f12e 100644 --- a/.github/workflows/_linux-benchmark-cuda.yml +++ b/.github/workflows/_linux-benchmark-cuda.yml @@ -47,7 +47,7 @@ jobs: with: python-version: "3.9" - - name: Install dependencies + - name: Install torch dependencies shell: bash working-directory: benchmark run: | diff --git a/userbenchmark/torchao/run.py b/userbenchmark/torchao/run.py index b2f67f6848..ee035b5274 100644 --- a/userbenchmark/torchao/run.py +++ b/userbenchmark/torchao/run.py @@ -29,7 +29,8 @@ def _get_ci_args( f"--{experiment}", "--output", f"{str(OUTPUT_DIR.joinpath(output_file_name).resolve())}", - "--only hf_T5", # DEBUG: TO BE REMOVED + "--only", # DEBUG: TO BE REMOVED + "hf_T5", # DEBUG: TO BE REMOVED ] return ci_args From 6f42b08a90197de1b848c61742570c9038c643d6 Mon Sep 17 00:00:00 2001 From: Huy Do Date: Tue, 17 Dec 2024 00:16:06 -0800 Subject: [PATCH 21/31] Install ao --- .github/workflows/_linux-benchmark-cuda.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/_linux-benchmark-cuda.yml b/.github/workflows/_linux-benchmark-cuda.yml index 3bbaa1f12e..f958612713 100644 --- a/.github/workflows/_linux-benchmark-cuda.yml +++ b/.github/workflows/_linux-benchmark-cuda.yml @@ -52,7 +52,7 @@ jobs: working-directory: benchmark run: | set -eux - ${CONDA_RUN} pip install torch torchvision torchaudio + ${CONDA_RUN} pip install torch torchvision torchaudio torchao - name: Install benchmark shell: bash From f28fa8648e4f7195ef63b2779c305e5abb5e5f89 Mon Sep 17 00:00:00 2001 From: Huy Do Date: Tue, 17 Dec 2024 00:32:10 -0800 Subject: [PATCH 22/31] Install nightly --- .github/workflows/_linux-benchmark-cuda.yml | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/.github/workflows/_linux-benchmark-cuda.yml b/.github/workflows/_linux-benchmark-cuda.yml index f958612713..db3827e369 100644 --- a/.github/workflows/_linux-benchmark-cuda.yml +++ b/.github/workflows/_linux-benchmark-cuda.yml @@ -52,7 +52,8 @@ jobs: working-directory: benchmark run: | set -eux - ${CONDA_RUN} pip install torch torchvision torchaudio torchao + ${CONDA_RUN} pip3 install --pre torch torchvision torchaudio torchao \ + --index-url https://download.pytorch.org/whl/nightly/cu124 - name: Install benchmark shell: bash From 54fe0527cd9da288b072761d9aaaada68e71bdfc Mon Sep 17 00:00:00 2001 From: Huy Do Date: Tue, 17 Dec 2024 00:50:50 -0800 Subject: [PATCH 23/31] Try another example --- .github/workflows/_linux-benchmark-cuda.yml | 2 +- userbenchmark/torchao/run.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/.github/workflows/_linux-benchmark-cuda.yml b/.github/workflows/_linux-benchmark-cuda.yml index db3827e369..3e0900c0fd 100644 --- a/.github/workflows/_linux-benchmark-cuda.yml +++ b/.github/workflows/_linux-benchmark-cuda.yml @@ -60,7 +60,7 @@ jobs: working-directory: benchmark run: | set -eux - ${CONDA_RUN} python install.py --numpy --models hf_T5 + ${CONDA_RUN} python install.py --numpy --models alexnet - name: Run benchmark shell: bash diff --git a/userbenchmark/torchao/run.py b/userbenchmark/torchao/run.py index ee035b5274..9279618504 100644 --- a/userbenchmark/torchao/run.py +++ b/userbenchmark/torchao/run.py @@ -30,7 +30,7 @@ def _get_ci_args( "--output", f"{str(OUTPUT_DIR.joinpath(output_file_name).resolve())}", "--only", # DEBUG: TO BE REMOVED - "hf_T5", # DEBUG: TO BE REMOVED + "alexnet", # DEBUG: TO BE REMOVED ] return ci_args From d0af2181392fa9cf18e4eb162a342aa946a2cba7 Mon Sep 17 00:00:00 2001 From: Huy Do Date: Tue, 17 Dec 2024 15:57:08 -0800 Subject: [PATCH 24/31] Another attempt --- .github/workflows/_linux-benchmark-cuda.yml | 2 +- userbenchmark/torchao/run.py | 3 ++- 2 files changed, 3 insertions(+), 2 deletions(-) diff --git a/.github/workflows/_linux-benchmark-cuda.yml b/.github/workflows/_linux-benchmark-cuda.yml index 3e0900c0fd..f64e15bf98 100644 --- a/.github/workflows/_linux-benchmark-cuda.yml +++ b/.github/workflows/_linux-benchmark-cuda.yml @@ -76,6 +76,6 @@ jobs: uses: pytorch/test-infra/.github/actions/upload-benchmark-results@main with: benchmark-results-dir: ${{ env.OUTPUT_DIR }} - dry-run: true # DEBUG: TO BE REMOVED + dry-run: false schema-version: v3 github-token: ${{ secrets.GITHUB_TOKEN }} diff --git a/userbenchmark/torchao/run.py b/userbenchmark/torchao/run.py index 9279618504..dea0dcfe03 100644 --- a/userbenchmark/torchao/run.py +++ b/userbenchmark/torchao/run.py @@ -36,7 +36,8 @@ def _get_ci_args( def _get_full_ci_args(modelset: str) -> List[List[str]]: - backends = ["autoquant", "int8dynamic", "int8weightonly", "noquant"] + # backends = ["autoquant", "int8dynamic", "int8weightonly", "noquant"] + backends = ["autoquant"] # DEBUG: TO BE REMOVED modelset = [modelset] dtype = ["bfloat16"] mode = ["inference"] From eb9671bf2d45a7e2b82909fd8ee0afbbde630865 Mon Sep 17 00:00:00 2001 From: Huy Do Date: Tue, 17 Dec 2024 16:08:32 -0800 Subject: [PATCH 25/31] Use something smaller --- .github/workflows/_linux-benchmark-cuda.yml | 2 +- userbenchmark/torchao/run.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/.github/workflows/_linux-benchmark-cuda.yml b/.github/workflows/_linux-benchmark-cuda.yml index f64e15bf98..8a8fee78cb 100644 --- a/.github/workflows/_linux-benchmark-cuda.yml +++ b/.github/workflows/_linux-benchmark-cuda.yml @@ -60,7 +60,7 @@ jobs: working-directory: benchmark run: | set -eux - ${CONDA_RUN} python install.py --numpy --models alexnet + ${CONDA_RUN} python install.py --numpy --models resnet18 - name: Run benchmark shell: bash diff --git a/userbenchmark/torchao/run.py b/userbenchmark/torchao/run.py index dea0dcfe03..29d0a7fff5 100644 --- a/userbenchmark/torchao/run.py +++ b/userbenchmark/torchao/run.py @@ -30,7 +30,7 @@ def _get_ci_args( "--output", f"{str(OUTPUT_DIR.joinpath(output_file_name).resolve())}", "--only", # DEBUG: TO BE REMOVED - "alexnet", # DEBUG: TO BE REMOVED + "resnet18", # DEBUG: TO BE REMOVED ] return ci_args From 03f23a1f0ea44a0727798385643d7be69ff43f15 Mon Sep 17 00:00:00 2001 From: Huy Do Date: Tue, 17 Dec 2024 22:56:20 -0800 Subject: [PATCH 26/31] Looks like it's working --- .github/workflows/_linux-benchmark-cuda.yml | 22 ++++++++++++++++---- .github/workflows/torchao.yml | 2 ++ userbenchmark/torchao/run.py | 23 ++++++++++++++------- 3 files changed, 35 insertions(+), 12 deletions(-) diff --git a/.github/workflows/_linux-benchmark-cuda.yml b/.github/workflows/_linux-benchmark-cuda.yml index 8a8fee78cb..2e113af619 100644 --- a/.github/workflows/_linux-benchmark-cuda.yml +++ b/.github/workflows/_linux-benchmark-cuda.yml @@ -10,6 +10,11 @@ on: required: true type: string description: Userbenchmark run command line arguments + only: + required: False + type: string + default: '' + description: Only run the selected model, used for testing secrets: HUGGING_FACE_HUB_TOKEN: required: false @@ -26,6 +31,7 @@ jobs: env: OUTPUT_DIR: '.userbenchmark' HUGGING_FACE_HUB_TOKEN: ${{ secrets.HUGGING_FACE_HUB_TOKEN }} + ONLY: ${{ inputs.only || '' }} steps: - name: Checkout TorchBench uses: actions/checkout@v3 @@ -60,17 +66,25 @@ jobs: working-directory: benchmark run: | set -eux - ${CONDA_RUN} python install.py --numpy --models resnet18 + + if [[ -z "${ONLY}" ]]; then + ${CONDA_RUN} python install.py --numpy + else + ${CONDA_RUN} python install.py --numpy --models "${ONLY}" + fi - name: Run benchmark shell: bash working-directory: benchmark run: | set -eux - ${CONDA_RUN} python run_benchmark.py ${{ inputs.userbenchmark }} ${{ inputs.userbenchmark-run-args }} - # DEBUG - ls -laR "${OUTPUT_DIR}" + if [[ -z "${ONLY}" ]]; then + ${CONDA_RUN} python run_benchmark.py ${{ inputs.userbenchmark }} ${{ inputs.userbenchmark-run-args }} + else + ${CONDA_RUN} python run_benchmark.py ${{ inputs.userbenchmark }} ${{ inputs.userbenchmark-run-args }} \ + --only "${ONLY}" + fi - name: Upload the benchmark results to OSS benchmark database for the dashboard uses: pytorch/test-infra/.github/actions/upload-benchmark-results@main diff --git a/.github/workflows/torchao.yml b/.github/workflows/torchao.yml index d6ac97bba0..670b5760b7 100644 --- a/.github/workflows/torchao.yml +++ b/.github/workflows/torchao.yml @@ -20,6 +20,8 @@ jobs: with: userbenchmark: torchao userbenchmark-run-args: "--ci --dashboard --torchbench" + # TODO (huydhn): Bring back the rest of them + only: BERT_pytorch secrets: HUGGING_FACE_HUB_TOKEN: ${{ secrets.HUGGING_FACE_HUB_TOKEN }} huggingface: diff --git a/userbenchmark/torchao/run.py b/userbenchmark/torchao/run.py index 29d0a7fff5..f02e02f9cb 100644 --- a/userbenchmark/torchao/run.py +++ b/userbenchmark/torchao/run.py @@ -1,5 +1,7 @@ import argparse +import glob import itertools +from multiprocessing import Process from typing import List from userbenchmark.utils import get_output_dir @@ -29,15 +31,12 @@ def _get_ci_args( f"--{experiment}", "--output", f"{str(OUTPUT_DIR.joinpath(output_file_name).resolve())}", - "--only", # DEBUG: TO BE REMOVED - "resnet18", # DEBUG: TO BE REMOVED ] return ci_args def _get_full_ci_args(modelset: str) -> List[List[str]]: - # backends = ["autoquant", "int8dynamic", "int8weightonly", "noquant"] - backends = ["autoquant"] # DEBUG: TO BE REMOVED + backends = ["autoquant", "int8dynamic", "int8weightonly", "noquant"] modelset = [modelset] dtype = ["bfloat16"] mode = ["inference"] @@ -92,11 +91,19 @@ def run(args: List[str]): raise RuntimeError( "CI mode must run with --timm, --huggingface, or --torchbench" ) + for params in benchmark_args: + params.extend(pt2_args) else: benchmark_args = [pt2_args] - output_files = [_run_pt2_args(args) for args in benchmark_args] + for params in benchmark_args: + # TODO (huydhn): Figure out why it crashes when running in the same process + p = Process(target=_run_pt2_args, args=(params,)) + p.start() + p.join() + # Post-processing - if args.dashboard: - post_ci_process(output_files) - print("\n".join(output_files)) + for file in glob.glob(f"{OUTPUT_DIR}/*.csv", recursive=True): + print(file) + if args.dashboard: + post_ci_process(file) From aceee44b9aa6fb0ba2f862654e16fd9343a9956c Mon Sep 17 00:00:00 2001 From: Huy Do Date: Tue, 17 Dec 2024 23:11:32 -0800 Subject: [PATCH 27/31] Use schema v3 --- userbenchmark/torchao/run.py | 6 ------ 1 file changed, 6 deletions(-) diff --git a/userbenchmark/torchao/run.py b/userbenchmark/torchao/run.py index f02e02f9cb..8d139eae69 100644 --- a/userbenchmark/torchao/run.py +++ b/userbenchmark/torchao/run.py @@ -101,9 +101,3 @@ def run(args: List[str]): p = Process(target=_run_pt2_args, args=(params,)) p.start() p.join() - - # Post-processing - for file in glob.glob(f"{OUTPUT_DIR}/*.csv", recursive=True): - print(file) - if args.dashboard: - post_ci_process(file) From 9835c3fc569cf6660e5a4f36a116d1500c0e9023 Mon Sep 17 00:00:00 2001 From: Huy Do Date: Tue, 17 Dec 2024 23:25:52 -0800 Subject: [PATCH 28/31] Wrong output dir --- .github/workflows/_linux-benchmark-cuda.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/_linux-benchmark-cuda.yml b/.github/workflows/_linux-benchmark-cuda.yml index 2e113af619..5843ea975a 100644 --- a/.github/workflows/_linux-benchmark-cuda.yml +++ b/.github/workflows/_linux-benchmark-cuda.yml @@ -89,7 +89,7 @@ jobs: - name: Upload the benchmark results to OSS benchmark database for the dashboard uses: pytorch/test-infra/.github/actions/upload-benchmark-results@main with: - benchmark-results-dir: ${{ env.OUTPUT_DIR }} + benchmark-results-dir: benchmark/${{ env.OUTPUT_DIR }} dry-run: false schema-version: v3 github-token: ${{ secrets.GITHUB_TOKEN }} From bd3a3c36edd8bdf72cdd191c4bce5fd81e220414 Mon Sep 17 00:00:00 2001 From: Huy Do Date: Tue, 17 Dec 2024 23:53:21 -0800 Subject: [PATCH 29/31] Is the path correct? --- .github/workflows/_linux-benchmark-cuda.yml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/.github/workflows/_linux-benchmark-cuda.yml b/.github/workflows/_linux-benchmark-cuda.yml index 5843ea975a..3491456da7 100644 --- a/.github/workflows/_linux-benchmark-cuda.yml +++ b/.github/workflows/_linux-benchmark-cuda.yml @@ -29,7 +29,7 @@ jobs: timeout-minutes: 1440 environment: docker-s3-upload env: - OUTPUT_DIR: '.userbenchmark' + OUTPUT_DIR: .userbenchmark HUGGING_FACE_HUB_TOKEN: ${{ secrets.HUGGING_FACE_HUB_TOKEN }} ONLY: ${{ inputs.only || '' }} steps: @@ -89,7 +89,7 @@ jobs: - name: Upload the benchmark results to OSS benchmark database for the dashboard uses: pytorch/test-infra/.github/actions/upload-benchmark-results@main with: - benchmark-results-dir: benchmark/${{ env.OUTPUT_DIR }} + benchmark-results-dir: benchmark/${{ env.OUTPUT_DIR }}/${{ inputs.userbenchmark }} dry-run: false schema-version: v3 github-token: ${{ secrets.GITHUB_TOKEN }} From f83c6ce9d655e896430f454b9f1b79676f2fe36c Mon Sep 17 00:00:00 2001 From: Huy Do Date: Wed, 18 Dec 2024 00:17:04 -0800 Subject: [PATCH 30/31] Run some more models --- .github/workflows/torchao.yml | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/.github/workflows/torchao.yml b/.github/workflows/torchao.yml index 670b5760b7..299e26b2b4 100644 --- a/.github/workflows/torchao.yml +++ b/.github/workflows/torchao.yml @@ -4,15 +4,16 @@ on: pull_request: workflow_dispatch: schedule: - - cron: '00 18 * * *' # run at 6:00 PM UTC, K8s containers will roll out at 12PM EST + - cron: '00 18 * * *' # run at 6:00 PM UTC jobs: timm: - if: false uses: ./.github/workflows/_linux-benchmark-cuda.yml with: userbenchmark: torchao userbenchmark-run-args: "--ci --dashboard --timm" + # TODO (huydhn): Bring back the rest of them later + only: adv_inception_v3 secrets: HUGGING_FACE_HUB_TOKEN: ${{ secrets.HUGGING_FACE_HUB_TOKEN }} torchbench: @@ -20,16 +21,17 @@ jobs: with: userbenchmark: torchao userbenchmark-run-args: "--ci --dashboard --torchbench" - # TODO (huydhn): Bring back the rest of them + # TODO (huydhn): Bring back the rest of them later only: BERT_pytorch secrets: HUGGING_FACE_HUB_TOKEN: ${{ secrets.HUGGING_FACE_HUB_TOKEN }} huggingface: - if: false uses: ./.github/workflows/_linux-benchmark-cuda.yml with: userbenchmark: torchao userbenchmark-run-args: "--ci --dashboard --huggingface" + # TODO (huydhn): Bring back the rest of them later + only: AlbertForMaskedLM secrets: HUGGING_FACE_HUB_TOKEN: ${{ secrets.HUGGING_FACE_HUB_TOKEN }} From 981d17f04ac4b34084fb400e30eb946853b54e3a Mon Sep 17 00:00:00 2001 From: Huy Do Date: Wed, 18 Dec 2024 15:49:29 -0800 Subject: [PATCH 31/31] Update torchao.yml to remove leftover debug codepath --- .github/workflows/torchao.yml | 2 -- 1 file changed, 2 deletions(-) diff --git a/.github/workflows/torchao.yml b/.github/workflows/torchao.yml index 299e26b2b4..59b7b9f9a2 100644 --- a/.github/workflows/torchao.yml +++ b/.github/workflows/torchao.yml @@ -1,7 +1,5 @@ name: Torchao nightly workflow (A100) on: - # DEBUG - pull_request: workflow_dispatch: schedule: - cron: '00 18 * * *' # run at 6:00 PM UTC