Skip to content

Commit

Permalink
Add smoke tests conv,linalg,compile. And better version check. (#1333)
Browse files Browse the repository at this point in the history
* Add smoke tests conv,linalg,compile

* Add version check

* Fix typo

Fix version check

Add not

* Add exception for python 3.11

* fix typo

* Try to exit after CUDA Runtime exception

* Restrict carsh test only to conda

* Restrict carsh test only to conda

* Fix tests

* Turn off cuda runtime issue

* tests

* more tests

* test

* remove compile step

* test

* disable some of the tests

* testing

* Remove extra index url

* test

* Fix tests

* Additional smoke tests

Remove release blocking changes
  • Loading branch information
atalman authored Mar 7, 2023
1 parent 7d4c980 commit 28508a3
Show file tree
Hide file tree
Showing 2 changed files with 72 additions and 13 deletions.
10 changes: 7 additions & 3 deletions .github/scripts/validate_binaries.sh
Original file line number Diff line number Diff line change
Expand Up @@ -17,9 +17,11 @@ else
conda env remove -n ${ENV_NAME}
else



# Special case Pypi installation package, only applicable to linux nightly CUDA 11.7 builds, wheel package
if [[ ${TARGET_OS} == 'linux' && ${MATRIX_CHANNEL} == 'nightly' && ${MATRIX_GPU_ARCH_VERSION} == '11.7' && ${MATRIX_PACKAGE_TYPE} == 'manywheel' ]]; then
conda create -yp ${ENV_NAME}_pypi python=${MATRIX_PYTHON_VERSION} numpy
if [[ ${TARGET_OS} == 'linux' && ${MATRIX_GPU_ARCH_VERSION} == '11.7' && ${MATRIX_PACKAGE_TYPE} == 'manywheel' ]]; then
conda create -yp ${ENV_NAME}_pypi python=${MATRIX_PYTHON_VERSION} numpy ffmpeg
INSTALLATION_PYPI=${MATRIX_INSTALLATION/"cu117"/"cu117_pypi_cudnn"}
INSTALLATION_PYPI=${INSTALLATION_PYPI/"torchvision torchaudio"/""}
INSTALLATION_PYPI=${INSTALLATION_PYPI/"index-url"/"extra-index-url"}
Expand All @@ -29,9 +31,11 @@ else
conda env remove -p ${ENV_NAME}_pypi
fi

conda create -y -n ${ENV_NAME} python=${MATRIX_PYTHON_VERSION} numpy pillow
# Please note ffmpeg is required for torchaudio, see https://github.com/pytorch/pytorch/issues/96159
conda create -y -n ${ENV_NAME} python=${MATRIX_PYTHON_VERSION} numpy ffmpeg
conda activate ${ENV_NAME}
INSTALLATION=${MATRIX_INSTALLATION/"conda install"/"conda install -y"}
INSTALLATION=${INSTALLATION/"extra-index-url"/"index-url"}
eval $INSTALLATION

if [[ ${TARGET_OS} == 'linux' ]]; then
Expand Down
75 changes: 65 additions & 10 deletions test/smoke_test/smoke_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,8 +10,10 @@

gpu_arch_ver = os.getenv("MATRIX_GPU_ARCH_VERSION")
gpu_arch_type = os.getenv("MATRIX_GPU_ARCH_TYPE")
# use installation env variable to tell if it is nightly channel
installation_str = os.getenv("MATRIX_INSTALLATION")
channel = os.getenv("MATRIX_CHANNEL")
stable_version = os.getenv("MATRIX_STABLE_VERSION")
package_type = os.getenv("MATRIX_PACKAGE_TYPE")

is_cuda_system = gpu_arch_type == "cuda"
SCRIPT_DIR = Path(__file__).parent
NIGHTLY_ALLOWED_DELTA = 3
Expand All @@ -31,6 +33,16 @@
},
]

def check_version(package: str) -> None:
# only makes sense to check nightly package where dates are known
if channel == "nightly":
check_nightly_binaries_date(options.package)
else:
if not torch.__version__.startswith(stable_version):
raise RuntimeError(
f"Torch version mismatch, expected {stable_version} for channel {channel}. But its {torch.__version__}"
)

def check_nightly_binaries_date(package: str) -> None:
from datetime import datetime, timedelta
format_dt = '%Y%m%d'
Expand Down Expand Up @@ -58,6 +70,7 @@ def check_nightly_binaries_date(package: str) -> None:
def test_cuda_runtime_errors_captured() -> None:
cuda_exception_missed=True
try:
print("Testing test_cuda_runtime_errors_captured")
torch._assert_async(torch.tensor(0, device="cuda"))
torch._assert_async(torch.tensor(0 + 0j, device="cuda"))
except RuntimeError as e:
Expand Down Expand Up @@ -95,29 +108,73 @@ def smoke_test_cuda(package: str) -> None:
print(f"torch cudnn: {torch.backends.cudnn.version()}")
print(f"cuDNN enabled? {torch.backends.cudnn.enabled}")

# This check has to be run last, since its messing up CUDA runtime
# torch.compile is available only on Linux and python 3.8-3.10
if (sys.platform == "linux" or sys.platform == "linux2") and sys.version_info < (3, 11, 0):
smoke_test_compile()

test_cuda_runtime_errors_captured()


def smoke_test_conv2d() -> None:
import torch.nn as nn

print("Calling smoke_test_conv2d")
print("Testing smoke_test_conv2d")
# With square kernels and equal stride
m = nn.Conv2d(16, 33, 3, stride=2)
# non-square kernels and unequal stride and with padding
m = nn.Conv2d(16, 33, (3, 5), stride=(2, 1), padding=(4, 2))
# non-square kernels and unequal stride and with padding and dilation
m = nn.Conv2d(16, 33, (3, 5), stride=(2, 1), padding=(4, 2), dilation=(3, 1))
basic_conv = nn.Conv2d(16, 33, (3, 5), stride=(2, 1), padding=(4, 2), dilation=(3, 1))
input = torch.randn(20, 16, 50, 100)
output = m(input)
output = basic_conv(input)

if is_cuda_system:
print("Testing smoke_test_conv2d with cuda")
conv = nn.Conv2d(3, 3, 3).cuda()
x = torch.randn(1, 3, 24, 24).cuda()
with torch.cuda.amp.autocast():
out = conv(x)

supported_dtypes = [torch.float16, torch.float32, torch.float64]
for dtype in supported_dtypes:
print(f"Testing smoke_test_conv2d with cuda for {dtype}")
conv = basic_conv.to(dtype).cuda()
input = torch.randn(20, 16, 50, 100, device="cuda").type(dtype)
output = conv(input)

def smoke_test_linalg() -> None:
print("Testing smoke_test_linalg")
A = torch.randn(5, 3)
U, S, Vh = torch.linalg.svd(A, full_matrices=False)
U.shape, S.shape, Vh.shape
torch.dist(A, U @ torch.diag(S) @ Vh)

U, S, Vh = torch.linalg.svd(A)
U.shape, S.shape, Vh.shape
torch.dist(A, U[:, :3] @ torch.diag(S) @ Vh)

A = torch.randn(7, 5, 3)
U, S, Vh = torch.linalg.svd(A, full_matrices=False)
torch.dist(A, U @ torch.diag_embed(S) @ Vh)

if is_cuda_system:
supported_dtypes = [torch.float32, torch.float64]
for dtype in supported_dtypes:
print(f"Testing smoke_test_linalg with cuda for {dtype}")
A = torch.randn(20, 16, 50, 100, device="cuda").type(dtype)
torch.linalg.svd(A)

def smoke_test_compile() -> None:
supported_dtypes = [torch.float16, torch.float32, torch.float64]
def foo(x: torch.Tensor) -> torch.Tensor:
return torch.sin(x) + torch.cos(x)
for dtype in supported_dtypes:
print(f"Testing smoke_test_compile for {dtype}")
x = torch.rand(3, 3, device="cuda").type(dtype)
x_eager = foo(x)
x_pt2 = torch.compile(foo)(x)
print(torch.allclose(x_eager, x_pt2))


def smoke_test_modules():
for module in MODULES:
Expand Down Expand Up @@ -146,15 +203,13 @@ def main() -> None:
)
options = parser.parse_args()
print(f"torch: {torch.__version__}")
check_version(options.package)
smoke_test_conv2d()
smoke_test_linalg()

if options.package == "all":
smoke_test_modules()

# only makes sense to check nightly package where dates are known
if installation_str.find("nightly") != -1:
check_nightly_binaries_date(options.package)

smoke_test_cuda(options.package)


Expand Down

0 comments on commit 28508a3

Please sign in to comment.