From fe8b97bdd9c522ac9324b72b9523640f99cd4bcb Mon Sep 17 00:00:00 2001 From: gs-olive <113141689+gs-olive@users.noreply.github.com> Date: Wed, 1 Nov 2023 20:11:39 -0700 Subject: [PATCH 1/2] fix: Segfault fix for Benchmarks - Segfault fix for benchmarking on Docker container with CUDNN 8.8 - Likely due to Torch 2.1.0 based on CUDNN 8.9 --- tools/perf/perf_run.py | 7 ++----- tools/perf/utils.py | 4 ---- 2 files changed, 2 insertions(+), 9 deletions(-) diff --git a/tools/perf/perf_run.py b/tools/perf/perf_run.py index d4d57c6d3f..58d03de4d1 100644 --- a/tools/perf/perf_run.py +++ b/tools/perf/perf_run.py @@ -7,6 +7,7 @@ import time import timeit import warnings +from functools import wraps import numpy as np import pandas as pd @@ -14,7 +15,6 @@ # Importing supported Backends import torch -import torch.backends.cudnn as cudnn from utils import ( BENCHMARK_MODELS, parse_backends, @@ -30,6 +30,7 @@ def run_with_try_except(func): + @wraps(func) def wrapper_func(*args, **kwargs): try: return func(*args, **kwargs) @@ -527,7 +528,6 @@ def recordStats(backend, timings, precision, batch_size=1, compile_time_s=None): ) args = arg_parser.parse_args() - cudnn.benchmark = True # Create random input tensor of certain size torch.manual_seed(12345) model_name = "Model" @@ -542,9 +542,6 @@ def recordStats(backend, timings, precision, batch_size=1, compile_time_s=None): if os.path.exists(model_name): print("Loading user provided torchscript model: ", model_name) model = torch.jit.load(model_name).cuda().eval() - elif model_name in BENCHMARK_MODELS: - print("Loading torchscript model from BENCHMARK_MODELS for: ", model_name) - model = BENCHMARK_MODELS[model_name]["model"].eval().cuda() # Load PyTorch Model, if provided if len(model_name_torch) > 0 and os.path.exists(model_name_torch): diff --git a/tools/perf/utils.py b/tools/perf/utils.py index 41f49439a2..a6f8ba236d 100644 --- a/tools/perf/utils.py +++ b/tools/perf/utils.py @@ -1,12 +1,8 @@ -from typing import Optional, Sequence, Union - import custom_models as cm import timm import torch import torchvision.models as models -import torch_tensorrt - BENCHMARK_MODEL_NAMES = { "vgg16", "alexnet", From 23bb89321025525366fa1836cc25c3d8af314b9d Mon Sep 17 00:00:00 2001 From: gs-olive <113141689+gs-olive@users.noreply.github.com> Date: Thu, 2 Nov 2023 11:49:04 -0700 Subject: [PATCH 2/2] fix: Upgrade CudNN to 8.9 --- .circleci/config.yml | 12 ++++++------ README.md | 2 +- WORKSPACE | 6 +++--- dev_dep_versions.yml | 2 +- docker/README.md | 4 ++-- 5 files changed, 13 insertions(+), 13 deletions(-) diff --git a/.circleci/config.yml b/.circleci/config.yml index d1e36447d3..2aad8ee43f 100644 --- a/.circleci/config.yml +++ b/.circleci/config.yml @@ -109,7 +109,7 @@ commands: sudo docker run --rm --runtime=nvidia --gpus all nvidia/cuda:11.6.2-base-ubuntu20.04 nvidia-smi install-cudnn: - description: "Install CUDNN 8.8.1" + description: "Install CUDNN 8.9.5" parameters: os: type: string @@ -119,7 +119,7 @@ commands: default: "x86_64" cudnn-version: type: string - default: "8.8.1.3" + default: "8.9.5.30" cuda-version: type: string default: "cuda12.0" @@ -198,7 +198,7 @@ commands: default: "cuda12.0" cudnn-version: type: string - default: "8.8.1.3" + default: "8.9.5.30" trt-version-short: type: string default: "8.6.1" @@ -246,7 +246,7 @@ commands: default: "8.6.1" cudnn-version-long: type: string - default: "8.8.1.3" + default: "8.9.5.30" steps: - run: name: Set up python environment @@ -1460,7 +1460,7 @@ parameters: default: "https://download.pytorch.org/whl/nightly/cu121" cudnn-version: type: string - default: "8.8.1.3" + default: "8.9.5.30" trt-version-short: type: string default: "8.6.1" @@ -1483,7 +1483,7 @@ parameters: default: "https://download.pytorch.org/whl/cu117" cudnn-version-legacy: type: string - default: "8.8.1.3" + default: "8.9.5.30" trt-version-short-legacy: type: string default: "8.6.1" diff --git a/README.md b/README.md index 44cc7c83e0..0ca48347fc 100644 --- a/README.md +++ b/README.md @@ -118,7 +118,7 @@ These are the following dependencies used to verify the testcases. Torch-TensorR - Bazel 5.2.0 - Libtorch 2.2.0.dev (latest nightly) (built with CUDA 12.1) - CUDA 12.1 -- cuDNN 8.8.1 +- cuDNN 8.9.5 - TensorRT 8.6.1 ## Prebuilt Binaries and Wheel files diff --git a/WORKSPACE b/WORKSPACE index b24384dfe8..bbc1803296 100644 --- a/WORKSPACE +++ b/WORKSPACE @@ -71,10 +71,10 @@ http_archive( http_archive( name = "cudnn", build_file = "@//third_party/cudnn/archive:BUILD", - sha256 = "79d77a769c7e7175abc7b5c2ed5c494148c0618a864138722c887f95c623777c", - strip_prefix = "cudnn-linux-x86_64-8.8.1.3_cuda12-archive", + sha256 = "2a2eb89a2ab51071151c6082f1e816c702167a711a9372f9f73a7b5c4b06e01a", + strip_prefix = "cudnn-linux-x86_64-8.9.5.30_cuda12-archive", urls = [ - "https://developer.nvidia.com/downloads/compute/cudnn/secure/8.8.1/local_installers/12.0/cudnn-linux-x86_64-8.8.1.3_cuda12-archive.tar.xz", + "https://developer.nvidia.com/downloads/compute/cudnn/secure/8.9.5/local_installers/12.x/cudnn-linux-x86_64-8.9.5.30_cuda12-archive.tar.xz", ], ) diff --git a/dev_dep_versions.yml b/dev_dep_versions.yml index 874a27cbcd..4249dc9b8b 100644 --- a/dev_dep_versions.yml +++ b/dev_dep_versions.yml @@ -1,4 +1,4 @@ __version__: "2.2.0.dev0" __cuda_version__: "12.1" -__cudnn_version__: "8.8" +__cudnn_version__: "8.9" __tensorrt_version__: "8.6" diff --git a/docker/README.md b/docker/README.md index 527b7ae2b2..9f83f25134 100644 --- a/docker/README.md +++ b/docker/README.md @@ -17,14 +17,14 @@ Note: By default the container uses the `pre-cxx11-abi` version of Torch + Torch ### Instructions -- The example below uses CUDNN 8.8 and TensorRT 8.6 +- The example below uses CUDNN 8.9 and TensorRT 8.6 - See dependencies for a list of current default dependencies. > From root of Torch-TensorRT repo Build: ``` -DOCKER_BUILDKIT=1 docker build --build-arg TENSORRT_VERSION=8.6 --build-arg CUDNN_VERSION=8.8 -f docker/Dockerfile -t torch_tensorrt:latest . +DOCKER_BUILDKIT=1 docker build --build-arg TENSORRT_VERSION=8.6 --build-arg CUDNN_VERSION=8.9 -f docker/Dockerfile -t torch_tensorrt:latest . ``` Run: