Skip to content
This repository has been archived by the owner on Nov 17, 2023. It is now read-only.

Fix Windows GPU CI #17962

Merged
merged 4 commits into from
Apr 3, 2020
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
68 changes: 42 additions & 26 deletions ci/build_windows.py
Original file line number Diff line number Diff line change
Expand Up @@ -33,13 +33,15 @@
import zipfile
from distutils.dir_util import copy_tree
from enum import Enum
from subprocess import check_call
from subprocess import check_call, call

from util import *

KNOWN_VCVARS = {
# https://gitlab.kitware.com/cmake/cmake/issues/18920
'VS 2015': r'C:\Program Files (x86)\Microsoft Visual Studio 14.0\VC\bin\x86_amd64\vcvarsx86_amd64.bat',
'VS 2017': r'C:\Program Files (x86)\Microsoft Visual Studio\2017\Community\VC\Auxiliary\Build\vcvarsx86_amd64.bat'
'VS 2017': r'C:\Program Files (x86)\Microsoft Visual Studio\2017\Community\VC\Auxiliary\Build\vcvarsx86_amd64.bat',
'VS 2019': r'C:\Program Files (x86)\Microsoft Visual Studio\2019\Community\VC\Auxiliary\Build\vcvars64.bat',
}


Expand All @@ -54,6 +56,8 @@ class BuildFlavour(Enum):

CMAKE_FLAGS = {
'WIN_CPU': (
'-DCMAKE_C_COMPILER=cl '
'-DCMAKE_CXX_COMPILER=cl '
'-DUSE_CUDA=OFF '
'-DUSE_CUDNN=OFF '
'-DENABLE_CUDA_RTC=OFF '
Expand All @@ -67,6 +71,8 @@ class BuildFlavour(Enum):
'-DCMAKE_BUILD_TYPE=Release')

, 'WIN_CPU_MKLDNN': (
'-DCMAKE_C_COMPILER=cl '
'-DCMAKE_CXX_COMPILER=cl '
'-DUSE_CUDA=OFF '
'-DUSE_CUDNN=OFF '
'-DENABLE_CUDA_RTC=OFF '
Expand All @@ -80,6 +86,8 @@ class BuildFlavour(Enum):
'-DCMAKE_BUILD_TYPE=Release')

, 'WIN_CPU_MKLDNN_MKL': (
'-DCMAKE_C_COMPILER=cl '
'-DCMAKE_CXX_COMPILER=cl '
'-DUSE_CUDA=OFF '
'-DUSE_CUDNN=OFF '
'-DENABLE_CUDA_RTC=OFF '
Expand All @@ -93,6 +101,8 @@ class BuildFlavour(Enum):
'-DCMAKE_BUILD_TYPE=Release')

, 'WIN_CPU_MKL': (
'-DCMAKE_C_COMPILER=cl '
'-DCMAKE_CXX_COMPILER=cl '
'-DUSE_CUDA=OFF '
'-DUSE_CUDNN=OFF '
'-DENABLE_CUDA_RTC=OFF '
Expand All @@ -106,6 +116,8 @@ class BuildFlavour(Enum):
'-DCMAKE_BUILD_TYPE=Release')

, 'WIN_GPU': (
'-DCMAKE_C_COMPILER=cl '
'-DCMAKE_CXX_COMPILER=cl '
'-DUSE_CUDA=ON '
'-DUSE_CUDNN=ON '
'-DENABLE_CUDA_RTC=ON '
Expand All @@ -120,6 +132,8 @@ class BuildFlavour(Enum):
'-DCMAKE_BUILD_TYPE=Release')

, 'WIN_GPU_MKLDNN': (
'-DCMAKE_C_COMPILER=cl '
'-DCMAKE_CXX_COMPILER=cl '
'-DUSE_CUDA=ON '
'-DUSE_CUDNN=ON '
'-DENABLE_CUDA_RTC=ON '
Expand All @@ -140,38 +154,40 @@ def windows_build(args):
logging.info("Using vcvars environment:\n{}".format(args.vcvars))

path = args.output
os.makedirs(path, exist_ok=True)

mxnet_root = get_mxnet_root()
logging.info("Found MXNet root: {}".format(mxnet_root))
# cuda thrust + VS 2019 is flaky: try multiple times if fail
MAXIMUM_TRY = 5
build_try = 0

while build_try < MAXIMUM_TRY:
if os.path.exists(path):
shutil.rmtree(path)
os.makedirs(path, exist_ok=True)

url = 'https://github.com/Kitware/CMake/releases/download/v3.16.1/cmake-3.16.1-win64-x64.zip'
with tempfile.TemporaryDirectory() as tmpdir:
cmake_file_path = download_file(url, tmpdir)
with zipfile.ZipFile(cmake_file_path, 'r') as zip_ref:
# Create $tmpdir\cmake-3.16.1-win64-x64\bin\cmake.exe
zip_ref.extractall(tmpdir)
mxnet_root = get_mxnet_root()
logging.info("Found MXNet root: {}".format(mxnet_root))

with remember_cwd():
os.chdir(path)
cmd = "\"{}\" && {} -G \"NMake Makefiles JOM\" {} {}".format(
args.vcvars,
os.path.join(tmpdir, 'cmake-3.16.1-win64-x64', 'bin', 'cmake.exe'),
CMAKE_FLAGS[args.flavour], mxnet_root)
cmd = "\"{}\" && cmake -GNinja {} {}".format(args.vcvars,
CMAKE_FLAGS[args.flavour],
mxnet_root)
logging.info("Generating project with CMake:\n{}".format(cmd))
check_call(cmd, shell=True)

cmd = "\"{}\" && jom".format(args.vcvars)
logging.info("Building with jom:\n{}".format(cmd))
cmd = "\"{}\" && ninja".format(args.vcvars)
logging.info("Building:\n{}".format(cmd))

t0 = int(time.time())
check_call(cmd, shell=True)

logging.info(
"Build flavour: {} complete in directory: \"{}\"".format(
args.flavour, os.path.abspath(path)))
logging.info("Build took {}".format(
datetime.timedelta(seconds=int(time.time() - t0))))
ret = call(cmd, shell=True)

if ret != 0:
build_try += 1
logging.info("{} build(s) have failed".format(build_try))
else:
logging.info("Build flavour: {} complete in directory: \"{}\"".format(args.flavour, os.path.abspath(path)))
logging.info("Build took {}".format(datetime.timedelta(seconds=int(time.time() - t0))))
break
windows_package(args)


Expand Down Expand Up @@ -233,7 +249,7 @@ def main():

parser.add_argument("--vcvars",
help="vcvars batch file location, typically inside vs studio install dir",
default=KNOWN_VCVARS['VS 2015'],
default=KNOWN_VCVARS['VS 2019'],
type=str)

parser.add_argument("--arch",
Expand All @@ -258,7 +274,7 @@ def main():
if 'OpenCV_DIR' not in os.environ:
os.environ["OpenCV_DIR"] = "C:\\Program Files\\OpenCV-v3.4.1\\build"
if 'CUDA_PATH' not in os.environ:
os.environ["CUDA_PATH"] = "C:\\Program Files\\NVIDIA GPU Computing Toolkit\\CUDA\\v9.2"
os.environ["CUDA_PATH"] = "C:\\Program Files\\NVIDIA GPU Computing Toolkit\\CUDA\\v10.2"
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Please revert the CUDA change. That does not represent a fix.

Copy link
Contributor Author

@leezu leezu Apr 2, 2020

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

It's a requirement for VS2019

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Usually visual studio does allow more than one cuda version. You just have to install the respective Toolkit.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

What do you mean? Cuda 9.2 does not support VS 2019

Copy link
Contributor Author

@leezu leezu Apr 3, 2020

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Oh, they intentionally broke that backwards compatibility feature. Usually it was possible to compile older cuda versions in later vs versions by installing toolkits which make sure that the integration is available. Seems like that caused issues and thus Microsoft and Nvidia decided to not go that route any further.

In that case, find to proceed.

But is there still some kind of compatibility mode which checks that the is still compliant with older cuda standards?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

But is there still some kind of compatibility mode which checks that the is still compliant with older cuda standards?

No. Unix & CentOS tests Cuda 10.1. Windows now tests Cuda 10.2.
But the risk is low that we'd break Cuda 9 support within the next few days. So it's not a one-way door decision. I suggest we discuss on dev if we want to support cuda 9. If we decide to support it, let's switch the CentOS tests to use Cuda 9.

if 'MKL_ROOT' not in os.environ:
os.environ["MKL_ROOT"] = "C:\\Program Files (x86)\\IntelSWTools\\compilers_and_libraries\\windows\\mkl"
windows_build(args)
Expand Down