diff --git a/docker/dist-accuracy-test.sh b/docker/dist-accuracy-test.sh new file mode 100644 index 0000000000..8214a00086 --- /dev/null +++ b/docker/dist-accuracy-test.sh @@ -0,0 +1,21 @@ +#!/bin/bash +set -o nounset +set -o errexit +set -o pipefail +set -e + +post=${1:-""} + +# fetch bazel executable +BAZEL_VERSION=4.2.1 +ARCH=$(uname -m) +if [[ "$ARCH" == "aarch64" ]]; then ARCH="arm64"; fi +wget -q https://github.com/bazelbuild/bazel/releases/download/${BAZEL_VERSION}/bazel-${BAZEL_VERSION}-linux-${ARCH} -O /usr/bin/bazel +chmod a+x /usr/bin/bazel +export NVIDIA_TF32_OVERRIDE=0 + +cd /opt/pytorch/torch_tensorrt +cp cp /opt/pytorch/torch_tensorrt/docker/WORKSPACE.docker /opt/pytorch/torch_tensorrt/WORKSPACE + +pip install --user --upgrade nox +nox diff --git a/noxfile.py b/noxfile.py new file mode 100644 index 0000000000..99cb45b9c2 --- /dev/null +++ b/noxfile.py @@ -0,0 +1,132 @@ +import nox +import os + +# Use system installed Python packages +PYT_PATH='/opt/conda/lib/python3.8/site-packages' + +# Root directory for torch_tensorrt. Set according to docker container by default +TOP_DIR='/opt/pytorch/torch_tensorrt' + +# Download the dataset +@nox.session(python=["3"], reuse_venv=True) +def download_datasets(session): + session.chdir(os.path.join(TOP_DIR, 'examples/int8/training/vgg16')) + session.run_always('wget', 'https://www.cs.toronto.edu/~kriz/cifar-10-binary.tar.gz') + session.run_always('tar', '-xvzf', 'cifar-10-binary.tar.gz') + session.run_always('mkdir', '-p', + os.path.join(TOP_DIR, 'tests/accuracy/datasets/data')) + session.run_always('cp', '-rpf', + os.path.join(TOP_DIR, 'examples/int8/training/vgg16/cifar-10-batches-bin'), + os.path.join(TOP_DIR, 'tests/accuracy/datasets/data/cidar-10-batches-bin'), + external=True) + +# Download the model +@nox.session(python=["3"], reuse_venv=True) +def download_models(session): + session.install('timm') + session.chdir('tests/modules') + session.run_always('python', + 'hub.py', + env={'PYTHONPATH': PYT_PATH}) + +# Train the model +@nox.session(python=["3"], reuse_venv=True) +def train_model(session): + session.chdir(os.path.join(TOP_DIR, 'examples/int8/training/vgg16')) + session.run_always('python', + 'main.py', + '--lr', '0.01', + '--batch-size', '128', + '--drop-ratio', '0.15', + '--ckpt-dir', 'vgg16_ckpts', + '--epochs', '25', + env={'PYTHONPATH': PYT_PATH}) + + # Export model + session.run_always('python', + 'export_ckpt.py', + 'vgg16_ckpts/ckpt_epoch25.pth', + env={'PYTHONPATH': PYT_PATH}) + +# Finetune the model +@nox.session(python=["3"], reuse_venv=True) +def finetune_model(session): + # Install pytorch-quantization dependency + session.install('pytorch-quantization', '--extra-index-url', 'https://pypi.ngc.nvidia.com') + + session.chdir(os.path.join(TOP_DIR, 'examples/int8/training/vgg16')) + session.run_always('python', + 'finetune_qat.py', + '--lr', '0.01', + '--batch-size', '128', + '--drop-ratio', '0.15', + '--ckpt-dir', 'vgg16_ckpts', + '--start-from', '25', + '--epochs', '26', + env={'PYTHONPATH': PYT_PATH}) + + # Export model + session.run_always('python', + 'export_qat.py', + 'vgg16_ckpts/ckpt_epoch26.pth', + env={'PYTHONPATH': PYT_PATH}) + +# Run PTQ tests +@nox.session(python=["3"], reuse_venv=True) +def ptq_test(session): + session.chdir(os.path.join(TOP_DIR, 'tests/py')) + session.run_always('cp', '-rf', + os.path.join(TOP_DIR, 'examples/int8/training/vgg16', 'trained_vgg16.jit.pt'), + '.', + external=True) + tests = [ + 'test_ptq_dataloader_calibrator.py', + 'test_ptq_to_backend.py', + 'test_ptq_trt_calibrator.py' + ] + for test in tests: + session.run_always('python', test, + env={'PYTHONPATH': PYT_PATH}) + +# Run QAT tests +@nox.session(python=["3"], reuse_venv=True) +def qat_test(session): + session.chdir(os.path.join(TOP_DIR, 'tests/py')) + session.run_always('cp', '-rf', + os.path.join(TOP_DIR, 'examples/int8/training/vgg16', 'trained_vgg16_qat.jit.pt'), + '.', + external=True) + + session.run_always('python', + 'test_qat_trt_accuracy.py', + env={'PYTHONPATH': PYT_PATH}) + +# Run Python API tests +@nox.session(python=["3"], reuse_venv=True) +def api_test(session): + session.chdir(os.path.join(TOP_DIR, 'tests/py')) + tests = [ + "test_api.py", + "test_to_backend_api.py" + ] + for test in tests: + session.run_always('python', + test, + env={'PYTHONPATH': PYT_PATH}) + +# Clean up +@nox.session(reuse_venv=True) +def cleanup(session): + target = [ + 'examples/int8/training/vgg16/*.jit.pt', + 'examples/int8/training/vgg16/vgg16_ckpts', + 'examples/int8/training/vgg16/cifar-10-*', + 'examples/int8/training/vgg16/data', + 'tests/modules/*.jit.pt', + 'tests/py/*.jit.pt' + ] + + target = ' '.join(x for x in [os.path.join(TOP_DIR, i) for i in target]) + session.run_always('bash', '-c', + str('rm -rf ') + target, + external=True) \ No newline at end of file diff --git a/tests/README.md b/tests/README.md index 2d1a9c493a..d9fca5a02b 100644 --- a/tests/README.md +++ b/tests/README.md @@ -1,6 +1,9 @@ # Tests -Right now there are two types of tests. Converter level tests and Module level tests. +Currently, following tests are supported: +1. Converter level tests +2. Module level tests +3. Accuracy tests The goal of Converter tests are to tests individual converters againsts specific subgraphs. The current tests in `core/conveters` are good examples on how to write these tests. In general every converter should have at least 1 test. More may be required if the operation has switches that change the behavior of the op. @@ -20,6 +23,24 @@ bazel test //tests --compilation_mode=dbg --test_output=errors --jobs=4 --runs_p `--jobs=4` is useful and is sometimes required to prevent too many processes to use GPU memory and cause CUDA out of memory issues. +Additionally, accuracy tests are supported for Python backend using Nox. Please refer [dist-accuracy-test.sh](../docker/dist-accuracy-test.sh) for reference. +``` +# To run complete Python accuracy + API tests +nox + +nox -l +``` + +Note: Supported Python tests +``` +* download_datasets-3 +* download_models-3 +* train_model-3 +* finetune_model-3 +* ptq_test-3 +* qat_test-3 +* cleanup +``` ### Testing using pre-built Torch-TensorRT library Currently, the default strategy when we run all the tests (`bazel test //tests`) is to build the testing scripts along with the full Torch-TensorRT library (`libtorchtrt.so`) from scratch. This can lead to increased testing time and might not be needed incase you already have a pre-built Torch-TensorRT library that you want to link against.