From 8b784e7008d4143f22f56d32290eb909d9fa5904 Mon Sep 17 00:00:00 2001 From: Olexa Bilaniuk Date: Mon, 15 Feb 2021 00:55:22 -0500 Subject: [PATCH 1/6] CUDA Toolkit 11.2.1 has been released, update version table Strangely, the minimum version of CUDA Toolkit 11.2.0 has also been updated - downwards. We pick up this change as well. --- mesonbuild/modules/unstable_cuda.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/mesonbuild/modules/unstable_cuda.py b/mesonbuild/modules/unstable_cuda.py index 0a5f03102447..33df0bdd1cb4 100644 --- a/mesonbuild/modules/unstable_cuda.py +++ b/mesonbuild/modules/unstable_cuda.py @@ -43,7 +43,8 @@ def min_driver_version(self, state, args, kwargs): cuda_version = args[0] driver_version_table = [ - {'cuda_version': '>=11.2.0', 'windows': '460.89', 'linux': '460.27.04'}, + {'cuda_version': '>=11.2.1', 'windows': '461.09', 'linux': '460.32.03'}, + {'cuda_version': '>=11.2.0', 'windows': '460.82', 'linux': '460.27.03'}, {'cuda_version': '>=11.1.1', 'windows': '456.81', 'linux': '455.32'}, {'cuda_version': '>=11.1.0', 'windows': '456.38', 'linux': '455.23'}, {'cuda_version': '>=11.0.3', 'windows': '451.82', 'linux': '450.51.06'}, From a2530373c742478f379793d541d9450599c4793c Mon Sep 17 00:00:00 2001 From: Olexa Bilaniuk Date: Mon, 15 Feb 2021 00:56:01 -0500 Subject: [PATCH 2/6] Add default debug flags for two configurations to NVCC. --- mesonbuild/compilers/compilers.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/mesonbuild/compilers/compilers.py b/mesonbuild/compilers/compilers.py index 1872253ca451..8171758f5e6b 100644 --- a/mesonbuild/compilers/compilers.py +++ b/mesonbuild/compilers/compilers.py @@ -175,8 +175,8 @@ class CompileCheckMode(enum.Enum): cuda_buildtype_args = {'plain': [], - 'debug': [], - 'debugoptimized': [], + 'debug': ['-g', '-G'], + 'debugoptimized': ['-g', '-lineinfo'], 'release': [], 'minsize': [], 'custom': [], From 3669be30aa69daff96d01f272433d6793182bd1d Mon Sep 17 00:00:00 2001 From: Olexa Bilaniuk Date: Mon, 15 Feb 2021 08:15:38 -0500 Subject: [PATCH 3/6] Extensive rewrite of GCC/MVSC flag translation to NVCC flags. --- mesonbuild/compilers/cuda.py | 344 +++++++++++++++++++++++++++++++---- 1 file changed, 313 insertions(+), 31 deletions(-) diff --git a/mesonbuild/compilers/cuda.py b/mesonbuild/compilers/cuda.py index 7fa3e4f87bc1..fe2d048a0fe9 100644 --- a/mesonbuild/compilers/cuda.py +++ b/mesonbuild/compilers/cuda.py @@ -45,7 +45,131 @@ class CudaCompiler(Compiler): LINKER_PREFIX = '-Xlinker=' language = 'cuda' - _universal_flags = {_Phase.COMPILER: ['-I', '-D', '-U', '-E'], _Phase.LINKER: ['-l', '-L']} # type: T.Dict[_Phase, T.List[str]] + # NVCC flags taking no arguments. + _FLAG_PASSTHRU_NOARGS = { + # NVCC --long-option, NVCC -short-option CUDA Toolkit 11.2.1 Reference + '--objdir-as-tempdir', '-objtemp', # 4.2.1.2 + '--generate-dependency-targets', '-MP', # 4.2.1.12 + '--allow-unsupported-compiler', '-allow-unsupported-compiler', # 4.2.1.14 + '--link', # 4.2.2.1 + '--lib', '-lib', # 4.2.2.2 + '--device-link', '-dlink', # 4.2.2.3 + '--device-c', '-dc', # 4.2.2.4 + '--device-w', '-dw', # 4.2.2.5 + '--cuda', '-cuda', # 4.2.2.6 + '--compile', '-c', # 4.2.2.7 + '--fatbin', '-fatbin', # 4.2.2.8 + '--cubin', '-cubin', # 4.2.2.9 + '--ptx', '-ptx', # 4.2.2.10 + '--preprocess', '-E', # 4.2.2.11 + '--generate-dependencies', '-M', # 4.2.2.12 + '--generate-nonsystem-dependencies', '-MM', # 4.2.2.13 + '--generate-dependencies-with-compile', '-MD', # 4.2.2.14 + '--generate-nonsystem-dependencies-with-compile', '-MMD', # 4.2.2.15 + '--run', # 4.2.2.16 + '--profile', '-pg', # 4.2.3.1 + '--debug', '-g', # 4.2.3.2 + '--device-debug', '-G', # 4.2.3.3 + '--extensible-whole-program', '-ewp', # 4.2.3.4 + '--generate-line-info', '-lineinfo', # 4.2.3.5 + '--dlink-time-opt', '-dlto', # 4.2.3.8 + '--no-exceptions', '-noeh', # 4.2.3.11 + '--shared', '-shared', # 4.2.3.12 + '--no-host-device-initializer-list', '-nohdinitlist', # 4.2.3.15 + '--expt-relaxed-constexpr', '-expt-relaxed-constexpr', # 4.2.3.16 + '--extended-lambda', '-extended-lambda', # 4.2.3.17 + '--expt-extended-lambda', '-expt-extended-lambda', # 4.2.3.18 + '--m32', '-m32', # 4.2.3.20 + '--m64', '-m64', # 4.2.3.21 + '--forward-unknown-to-host-compiler', '-forward-unknown-to-host-compiler', # 4.2.5.1 + '--forward-unknown-to-host-linker', '-forward-unknown-to-host-linker', # 4.2.5.2 + '--dont-use-profile', '-noprof', # 4.2.5.3 + '--dryrun', '-dryrun', # 4.2.5.5 + '--verbose', '-v', # 4.2.5.6 + '--keep', '-keep', # 4.2.5.7 + '--save-temps', '-save-temps', # 4.2.5.9 + '--clean-targets', '-clean', # 4.2.5.10 + '--no-align-double', # 4.2.5.16 + '--no-device-link', '-nodlink', # 4.2.5.17 + '--allow-unsupported-compiler', '-allow-unsupported-compiler', # 4.2.5.18 + '--use_fast_math', '-use_fast_math', # 4.2.7.7 + '--extra-device-vectorization', '-extra-device-vectorization', # 4.2.7.12 + '--compile-as-tools-patch', '-astoolspatch', # 4.2.7.13 + '--keep-device-functions', '-keep-device-functions', # 4.2.7.14 + '--disable-warnings', '-w', # 4.2.8.1 + '--source-in-ptx', '-src-in-ptx', # 4.2.8.2 + '--restrict', '-restrict', # 4.2.8.3 + '--Wno-deprecated-gpu-targets', '-Wno-deprecated-gpu-targets', # 4.2.8.4 + '--Wno-deprecated-declarations', '-Wno-deprecated-declarations', # 4.2.8.5 + '--Wreorder', '-Wreorder', # 4.2.8.6 + '--Wdefault-stream-launch', '-Wdefault-stream-launch', # 4.2.8.7 + '--Wext-lambda-captures-this', '-Wext-lambda-captures-this', # 4.2.8.8 + '--display-error-number', '-err-no', # 4.2.8.10 + '--resource-usage', '-res-usage', # 4.2.8.14 + '--help', '-h', # 4.2.8.15 + '--version', '-V', # 4.2.8.16 + '--list-gpu-code', '-code-ls', # 4.2.8.20 + '--list-gpu-arch', '-arch-ls', # 4.2.8.21 + } + # Dictionary of NVCC flags taking either one argument or a comma-separated list. + # Maps --long to -short options, because the short options are more GCC-like. + _FLAG_LONG2SHORT_WITHARGS = { + '--output-file': '-o', # 4.2.1.1 + '--pre-include': '-include', # 4.2.1.3 + '--library': '-l', # 4.2.1.4 + '--define-macro': '-D', # 4.2.1.5 + '--undefine-macro': '-U', # 4.2.1.6 + '--include-path': '-I', # 4.2.1.7 + '--system-include': '-isystem', # 4.2.1.8 + '--library-path': '-L', # 4.2.1.9 + '--output-directory': '-odir', # 4.2.1.10 + '--dependency-output': '-MF', # 4.2.1.11 + '--compiler-bindir': '-ccbin', # 4.2.1.13 + '--archiver-binary': '-arbin', # 4.2.1.15 + '--cudart': '-cudart', # 4.2.1.16 + '--cudadevrt': '-cudadevrt', # 4.2.1.17 + '--libdevice-directory': '-ldir', # 4.2.1.18 + '--target-directory': '-target-dir', # 4.2.1.19 + '--optimization-info': '-opt-info', # 4.2.3.6 + '--optimize': '-O', # 4.2.3.7 + '--ftemplate-backtrace-limit': '-ftemplate-backtrace-limit', # 4.2.3.9 + '--ftemplate-depth': '-ftemplate-depth', # 4.2.3.10 + '--x': '-x', # 4.2.3.13 + '--std': '-std', # 4.2.3.14 + '--machine': '-m', # 4.2.3.19 + '--compiler-options': '-Xcompiler', # 4.2.4.1 + '--linker-options': '-Xlinker', # 4.2.4.2 + '--archive-options': '-Xarchive', # 4.2.4.3 + '--ptxas-options': '-Xptxas', # 4.2.4.4 + '--nvlink-options': '-Xnvlink', # 4.2.4.5 + '--threads': '-t', # 4.2.5.4 + '--keep-dir': '-keep-dir', # 4.2.5.8 + '--run-args': '-run-args', # 4.2.5.11 + '--input-drive-prefix': '-idp', # 4.2.5.12 + '--dependency-drive-prefix': '-ddp', # 4.2.5.13 + '--drive-prefix': '-dp', # 4.2.5.14 + '--dependency-target-name': '-MT', # 4.2.5.15 + '--default-stream': '-default-stream', # 4.2.6.1 + '--gpu-architecture': '-arch', # 4.2.7.1 + '--gpu-code': '-code', # 4.2.7.2 + '--generate-code': '-gencode', # 4.2.7.3 + '--relocatable-device-code': '-rdc', # 4.2.7.4 + '--entries': '-e', # 4.2.7.5 + '--maxrregcount': '-maxrregcount', # 4.2.7.6 + '--ftz': '-ftz', # 4.2.7.8 + '--prec-div': '-prec-div', # 4.2.7.9 + '--prec-sqrt': '-prec-sqrt', # 4.2.7.10 + '--fmad': '-fmad', # 4.2.7.11 + '--Werror': '-Werror', # 4.2.8.9 + '--diag-error': '-diag-error', # 4.2.8.11 + '--diag-suppress': '-diag-suppress', # 4.2.8.12 + '--diag-warn': '-diag-warn', # 4.2.8.13 + '--options-file': '-optf', # 4.2.8.17 + '--time': '-time', # 4.2.8.18 + '--qpp-config': '-qpp-config', # 4.2.8.19 + } + # Reverse map -short to --long options. + _FLAG_SHORT2LONG_WITHARGS = {v:k for k,v in _FLAG_LONG2SHORT_WITHARGS.items()} def __init__(self, exelist: T.List[str], version: str, for_machine: MachineChoice, is_cross: bool, exe_wrapper: T.Optional['ExternalProgram'], @@ -61,20 +185,189 @@ def __init__(self, exelist: T.List[str], version: str, for_machine: MachineChoic @classmethod def _to_host_flags(cls, flags: T.List[str], phase: _Phase = _Phase.COMPILER) -> T.List[str]: - return [cls._to_host_flag(f, phase=phase) for f in flags] - - @classmethod - def _to_host_flag(cls, flag: str, phase: _Phase) -> str: - if not flag[0] in ['-', '/'] or flag[:2] in cls._universal_flags[phase]: - return flag + """ + Translate generic "GCC-speak" plus particular "NVCC-speak" flags to NVCC flags. + + NVCC's "short" flags have broad similarities to the GCC standard, but have + gratuitous, irritating differences. + """ + + xflags = [] + flagit = iter(flags) + + for flag in flagit: + # The CUDA Toolkit Documentation, in 4.1. Command Option Types and Notation, + # specifies that NVCC does not parse the standard flags as GCC does. It has + # its own strategy, to wit: + # + # nvcc recognizes three types of command options: boolean options, single + # value options, and list options. + # + # Boolean options do not have an argument; they are either specified on a + # command line or not. Single value options must be specified at most once, + # and list options may be repeated. Examples of each of these option types + # are, respectively: --verbose (switch to verbose mode), --output-file + # (specify output file), and --include-path (specify include path). + # + # Single value options and list options must have arguments, which must + # follow the name of the option itself by either one of more spaces or an + # equals character. When a one-character short name such as -I, -l, and -L + # is used, the value of the option may also immediately follow the option + # itself without being seperated by spaces or an equal character. The + # individual values of list options may be separated by commas in a single + # instance of the option, or the option may be repeated, or any + # combination of these two cases. + # + # One strange consequence of this choice is that directory and filenames that + # contain commas (',') cannot be passed to NVCC (at least, not as easily as + # in GCC). Another strange consequence is that it is legal to supply flags + # such as + # + # -lpthread,rt,dl,util + # -l pthread,rt,dl,util + # -l=pthread,rt,dl,util + # + # and each of the above alternatives is equivalent to GCC-speak + # + # -lpthread -lrt -ldl -lutil + # -l pthread -l rt -l dl -l util + # -l=pthread -l=rt -l=dl -l=util + # + # *With the exception of commas in the name*, GCC-speak for these list flags + # is a strict subset of NVCC-speak, so we passthrough those flags. + # + # The -D macro-define flag is documented as somehow shielding commas from + # splitting a definition. Balanced parentheses, braces and single-quotes + # around the comma are not sufficient, but balanced double-quotes are. The + # shielding appears to work with -l, -I, -L flags as well, for instance. + # + # Since our goal is to replicate GCC-speak as much as possible, we check for + # commas in all list-arguments and shield them with double-quotes. We make + # an exception for -D (where this would be value-changing) and -U (because + # it isn't possible to define a macro with a comma in the name). + + if flag in cls._FLAG_PASSTHRU_NOARGS: + xflags.append(flag) + continue + + + # Handle breakup of flag-values into a flag-part and value-part. + if flag[:1] not in '-/': + # This is not a flag. It's probably a file input. Pass it through. + xflags.append(flag) + continue + elif flag[:1] == '/': + # This is ambiguously either an MVSC-style /switch or an absolute path + # to a file. For some magical reason the following works acceptably in + # both cases. + wrap = '"' if ',' in flag else '' + xflags.append(f'-X{phase.value}={wrap}{flag}{wrap}') + continue + elif len(flag) >= 2 and flag[0] == '-' and flag[1] in 'IDULlmOxmte': + # This is a single-letter short option. These options (with the + # exception of -o) are allowed to receive their argument with neither + # space nor = sign before them. Detect and separate them in that event. + if flag[2:3] == '': # -I something + try: + val = next(flagit) + except StopIteration: + pass + elif flag[2:3] == '=': # -I=something + val = flag[3:] + else: # -Isomething + val = flag[2:] + flag = flag[:2] # -I + elif flag in cls._FLAG_LONG2SHORT_WITHARGS or \ + flag in cls._FLAG_SHORT2LONG_WITHARGS: + # This is either -o or a multi-letter flag, and it is receiving its + # value isolated. + try: + val = next(flagit) # -o something + except StopIteration: + pass + elif flag.split('=',1)[0] in cls._FLAG_LONG2SHORT_WITHARGS or \ + flag.split('=',1)[0] in cls._FLAG_SHORT2LONG_WITHARGS: + # This is either -o or a multi-letter flag, and it is receiving its + # value after an = sign. + flag, val = flag.split('=',1) # -o=something + else: + # This is a flag, and it's foreign to NVCC. + # + # We do not know whether this GCC-speak flag takes an isolated + # argument. Assuming it does not (the vast majority indeed don't), + # wrap this argument in an -Xcompiler flag and send it down to NVCC. + if flag == '-ffast-math': + xflags.append('-use_fast_math') + xflags.append('-Xcompiler') + xflags.append(flag) + elif flag == '-fno-fast-math': + xflags.append('-ftz=false') + xflags.append('-prec-div=true') + xflags.append('-prec-sqrt=true') + xflags.append('-Xcompiler') + xflags.append(flag) + elif flag == '-freciprocal-math': + xflags.append('-prec-div=false') + xflags.append('-Xcompiler') + xflags.append(flag) + elif flag == '-fno-reciprocal-math': + xflags.append('-prec-div=true') + xflags.append('-Xcompiler') + xflags.append(flag) + else: + xflags.append('-Xcompiler') + xflags.append(f'"{flag}"' if ',' in flag else flag) + # The above shields -Wl, -Wa, -Wp arguments against splitting. + continue + + + assert val is not None # Should only trip if there is a missing argument. + + + # Take care of the various NVCC-supported flags that need special handling. + flag = cls._FLAG_LONG2SHORT_WITHARGS.get(flag,flag) + + if flag in {'-include','-isystem','-I','-L','-l'}: + # These flags are known to GCC, but list-valued in NVCC. They potentially + # require double-quoting to prevent NVCC interpreting the flags as lists + # when GCC would not have done so. + # + # We avoid doing this quoting for -D to avoid redefining macros and for + # -U because it isn't possible to define a macro with a comma in the name. + # -U with comma arguments is impossible in GCC-speak (and thus unambiguous + #in NVCC-speak, albeit unportable). + if flag in {'-I','-L','-l'}: + xflags.append(flag+f'"{val}"' if ',' in val else flag+val) + else: + xflags.append(flag) + xflags.append(f'"{val}"' if ',' in val else val) + elif flag == '-O': + # Handle optimization levels GCC knows about that NVCC does not. + if val == 'fast': + xflags.append('-O3') + xflags.append('-use_fast_math') + xflags.append('-Xcompiler') + xflags.append(flag+val) + elif val in {'s', 'g', 'z'}: + xflags.append('-Xcompiler') + xflags.append(flag+val) + else: + xflags.append(flag+val) + elif flag in {'-D', '-U', '-m', '-t'}: + xflags.append(flag+val) # For style, keep glued. + elif flag in {'-std'}: + xflags.append(flag+'='+val) # For style, keep glued. + else: + xflags.append(flag) + xflags.append(val) - return '-X{}={}'.format(phase.value, flag) + return xflags def needs_static_linker(self) -> bool: return False def thread_link_flags(self, environment: 'Environment') -> T.List[str]: - return self._to_host_flags(self.host_compiler.thread_link_flags(environment)) + return self._to_host_flags(self.host_compiler.thread_link_flags(environment), _Phase.LINKER) def sanity_check(self, work_dir: str, environment: 'Environment') -> None: mlog.debug('Sanity testing ' + self.get_display_language() + ' compiler:', ' '.join(self.exelist)) @@ -223,27 +516,16 @@ def get_option_compile_args(self, options: 'KeyedOptionDictType') -> T.List[str] return args + self._to_host_flags(self.host_compiler.get_option_compile_args(self._to_host_compiler_options(options))) - @classmethod - def _cook_link_args(cls, args: T.List[str]) -> T.List[str]: - # Prepare link args for nvcc - cooked = [] # type: T.List[str] - for arg in args: - if arg.startswith('-Wl,'): # strip GNU-style -Wl prefix - arg = arg.replace('-Wl,', '', 1) - arg = arg.replace(' ', '\\') # espace whitespace - cooked.append(arg) - return cls._to_host_flags(cooked, _Phase.LINKER) - def get_option_link_args(self, options: 'KeyedOptionDictType') -> T.List[str]: - return self._cook_link_args(self.host_compiler.get_option_link_args(self._to_host_compiler_options(options))) + return self._to_host_flags(self.host_compiler.get_option_link_args(self._to_host_compiler_options(options)), _Phase.LINKER) def get_soname_args(self, env: 'Environment', prefix: str, shlib_name: str, suffix: str, soversion: str, darwin_versions: T.Tuple[str, str], is_shared_module: bool) -> T.List[str]: - return self._cook_link_args(self.host_compiler.get_soname_args( + return self._to_host_flags(self.host_compiler.get_soname_args( env, prefix, shlib_name, suffix, soversion, darwin_versions, - is_shared_module)) + is_shared_module), _Phase.LINKER) def get_compile_only_args(self) -> T.List[str]: return ['-c'] @@ -275,26 +557,26 @@ def get_buildtype_args(self, buildtype: str) -> T.List[str]: def get_include_args(self, path: str, is_system: bool) -> T.List[str]: if path == '': path = '.' - return ['-I' + path] + return ['-isystem=' + path] if is_system else ['-I' + path] def get_compile_debugfile_args(self, rel_obj: str, pch: bool = False) -> T.List[str]: return self._to_host_flags(self.host_compiler.get_compile_debugfile_args(rel_obj, pch)) def get_link_debugfile_args(self, targetfile: str) -> T.List[str]: - return self._cook_link_args(self.host_compiler.get_link_debugfile_args(targetfile)) + return self._to_host_flags(self.host_compiler.get_link_debugfile_args(targetfile), _Phase.LINKER) def get_depfile_suffix(self) -> str: return 'd' def get_buildtype_linker_args(self, buildtype: str) -> T.List[str]: - return self._cook_link_args(self.host_compiler.get_buildtype_linker_args(buildtype)) + return self._to_host_flags(self.host_compiler.get_buildtype_linker_args(buildtype), _Phase.LINKER) def build_rpath_args(self, env: 'Environment', build_dir: str, from_dir: str, rpath_paths: str, build_rpath: str, install_rpath: str) -> T.Tuple[T.List[str], T.Set[bytes]]: (rpath_args, rpath_dirs_to_remove) = self.host_compiler.build_rpath_args( env, build_dir, from_dir, rpath_paths, build_rpath, install_rpath) - return (self._cook_link_args(rpath_args), rpath_dirs_to_remove) + return (self._to_host_flags(rpath_args, _Phase.LINKER), rpath_dirs_to_remove) def linker_to_compiler_args(self, args: T.List[str]) -> T.List[str]: return args @@ -310,7 +592,7 @@ def get_output_args(self, target: str) -> T.List[str]: return ['-o', target] def get_std_exe_link_args(self) -> T.List[str]: - return self._cook_link_args(self.host_compiler.get_std_exe_link_args()) + return self._to_host_flags(self.host_compiler.get_std_exe_link_args(), _Phase.LINKER) def find_library(self, libname: str, env: 'Environment', extra_dirs: T.List[str], libtype: LibType = LibType.PREFER_SHARED) -> T.Optional[T.List[str]]: @@ -326,13 +608,13 @@ def get_crt_link_args(self, crt_val: str, buildtype: str) -> T.List[str]: host_crt_compile_args = self.host_compiler.get_crt_compile_args(crt_val, buildtype) if any(arg in ['/MDd', '/MD', '/MTd'] for arg in host_crt_compile_args): host_link_arg_overrides += ['/NODEFAULTLIB:LIBCMT.lib'] - return self._cook_link_args(host_link_arg_overrides + self.host_compiler.get_crt_link_args(crt_val, buildtype)) + return self._to_host_flags(host_link_arg_overrides + self.host_compiler.get_crt_link_args(crt_val, buildtype), _Phase.LINKER) def get_target_link_args(self, target: 'BuildTarget') -> T.List[str]: - return self._cook_link_args(super().get_target_link_args(target)) + return self._to_host_flags(super().get_target_link_args(target), _Phase.LINKER) def get_dependency_compile_args(self, dep: 'Dependency') -> T.List[str]: return self._to_host_flags(super().get_dependency_compile_args(dep)) def get_dependency_link_args(self, dep: 'Dependency') -> T.List[str]: - return self._cook_link_args(super().get_dependency_link_args(dep)) + return self._to_host_flags(super().get_dependency_link_args(dep), _Phase.LINKER) From cc09d984f1d4517d72846c691aef355c1dcfc752 Mon Sep 17 00:00:00 2001 From: Olexa Bilaniuk Date: Tue, 16 Feb 2021 06:45:18 -0500 Subject: [PATCH 4/6] Armour-grade quoting to account for NVCC's -Xcompiler peculiarities. --- mesonbuild/compilers/cuda.py | 98 ++++++++++++++++++++++++++++++------ 1 file changed, 84 insertions(+), 14 deletions(-) diff --git a/mesonbuild/compilers/cuda.py b/mesonbuild/compilers/cuda.py index fe2d048a0fe9..26984fea6e1b 100644 --- a/mesonbuild/compilers/cuda.py +++ b/mesonbuild/compilers/cuda.py @@ -14,6 +14,7 @@ import enum import os.path +import string import typing as T from .. import coredata @@ -183,6 +184,80 @@ def __init__(self, exelist: T.List[str], version: str, for_machine: MachineChoic self.id = 'nvcc' self.warn_args = {level: self._to_host_flags(flags) for level, flags in host_compiler.warn_args.items()} + @classmethod + def _shield_nvcc_list_arg(cls, arg: str, listmode: bool=True) -> str: + """ + Shield an argument against both splitting by NVCC's list-argument + parse logic, and interpretation by any shell. + + NVCC seems to consider every comma , that is neither escaped by \ nor inside + a double-quoted string a split-point. Single-quotes do not provide protection + against splitting; In fact, after splitting they are \-escaped. Unfortunately, + double-quotes don't protect against shell expansion. What follows is a + complex dance to accomodate everybody. + """ + + SQ = "'" + DQ = '"' + CM = "," + BS = "\\" + DQSQ = DQ+SQ+DQ + quotable = set(string.whitespace+'"$`\\') + + if CM not in arg or not listmode: + if SQ not in arg: + # If any of the special characters "$`\ or whitespace are present, single-quote. + # Otherwise return bare. + if set(arg).intersection(quotable): + return SQ+arg+SQ + else: + return arg # Easy case: no splits, no quoting. + else: + # There are single quotes. Double-quote them, and single-quote the + # strings between them. + l = [cls._shield_nvcc_list_arg(s) for s in arg.split(SQ)] + l = sum([[s, DQSQ] for s in l][:-1], []) # Interleave l with DQSQs + + # The list l now has the structure of shielded strings interleaved + # with double-quoted single-quotes. + # + # Plain concatenation would result in the tripling of the length of + # a string made up only of single quotes. See if we can merge some + # DQSQs together first. + def isdqsq(x:str) -> bool: + return x.startswith(SQ) and x.endswith(SQ) and x[1:-1].strip(SQ) == '' + for i in range(1, len(l)-2, 2): + if isdqsq(l[i]) and l[i+1] == '' and isdqsq(l[i+2]): + l[i+2] = l[i][:-1]+l[i+2][1:] + l[i] = '' + + # With DQSQs merged, simply concatenate everything together and return. + return ''.join(l) + else: + # A comma is present, and list mode was active. + # We apply (what we guess is) the (primitive) NVCC splitting rule: + l = [''] + instring = False + argit = iter(arg) + for c in argit: + if c == CM and not instring: + l.append('') + elif c == DQ: + l[-1] += c + instring = not instring + elif c == BS: + try: + l[-1] += next(argit) + except StopIteration: + break + else: + l[-1] += c + + # Shield individual strings, without listmode, then return them with + # escaped commas between them. + l = [cls._shield_nvcc_list_arg(s, listmode=False) for s in l] + return '\,'.join(l) + @classmethod def _to_host_flags(cls, flags: T.List[str], phase: _Phase = _Phase.COMPILER) -> T.List[str]: """ @@ -298,26 +373,21 @@ def _to_host_flags(cls, flags: T.List[str], phase: _Phase = _Phase.COMPILER) -> # wrap this argument in an -Xcompiler flag and send it down to NVCC. if flag == '-ffast-math': xflags.append('-use_fast_math') - xflags.append('-Xcompiler') - xflags.append(flag) + xflags.append('-Xcompiler='+flag) elif flag == '-fno-fast-math': xflags.append('-ftz=false') xflags.append('-prec-div=true') xflags.append('-prec-sqrt=true') - xflags.append('-Xcompiler') - xflags.append(flag) + xflags.append('-Xcompiler='+flag) elif flag == '-freciprocal-math': xflags.append('-prec-div=false') - xflags.append('-Xcompiler') - xflags.append(flag) + xflags.append('-Xcompiler='+flag) elif flag == '-fno-reciprocal-math': xflags.append('-prec-div=true') - xflags.append('-Xcompiler') - xflags.append(flag) + xflags.append('-Xcompiler='+flag) else: - xflags.append('-Xcompiler') - xflags.append(f'"{flag}"' if ',' in flag else flag) - # The above shields -Wl, -Wa, -Wp arguments against splitting. + xflags.append('-Xcompiler='+cls._shield_nvcc_list_arg(flag)) + # The above should securely handle GCC's -Wl, -Wa, -Wp, arguments. continue @@ -336,11 +406,11 @@ def _to_host_flags(cls, flags: T.List[str], phase: _Phase = _Phase.COMPILER) -> # -U because it isn't possible to define a macro with a comma in the name. # -U with comma arguments is impossible in GCC-speak (and thus unambiguous #in NVCC-speak, albeit unportable). - if flag in {'-I','-L','-l'}: - xflags.append(flag+f'"{val}"' if ',' in val else flag+val) + if len(flag) == 2: + xflags.append(flag+cls._shield_nvcc_list_arg(val)) else: xflags.append(flag) - xflags.append(f'"{val}"' if ',' in val else val) + xflags.append(cls._shield_nvcc_list_arg(val)) elif flag == '-O': # Handle optimization levels GCC knows about that NVCC does not. if val == 'fast': From d1e945f442e8601eea0014ebb27cd57f10d52eec Mon Sep 17 00:00:00 2001 From: Olexa Bilaniuk Date: Tue, 16 Feb 2021 10:44:06 -0500 Subject: [PATCH 5/6] Add optional -Dcuda_ccbindir= option and -ccbin flag to CUDA compiler. Closes #8110. --- docs/markdown/Builtin-options.md | 1 + mesonbuild/compilers/cuda.py | 40 +++++++++++++++++++++++++------- 2 files changed, 33 insertions(+), 8 deletions(-) diff --git a/docs/markdown/Builtin-options.md b/docs/markdown/Builtin-options.md index 2d7c01caeca8..e3805ac0049f 100644 --- a/docs/markdown/Builtin-options.md +++ b/docs/markdown/Builtin-options.md @@ -194,6 +194,7 @@ or compiler being used: | cpp_thread_count | 4 | integer value ≥ 0 | Number of threads to use with emcc when using threads | | cpp_winlibs | see below | free-form comma-separated list | Standard Windows libs to link against | | fortran_std | none | [none, legacy, f95, f2003, f2008, f2018] | Fortran language standard to use | +| cuda_ccbindir | | filesystem path | CUDA non-default toolchain directory to use (-ccbin) *(Added in 0.57.1)* | The default values of `c_winlibs` and `cpp_winlibs` are in compiler-specific argument forms, but the libraries are: kernel32, diff --git a/mesonbuild/compilers/cuda.py b/mesonbuild/compilers/cuda.py index 26984fea6e1b..4efe0c615cc2 100644 --- a/mesonbuild/compilers/cuda.py +++ b/mesonbuild/compilers/cuda.py @@ -439,7 +439,7 @@ def needs_static_linker(self) -> bool: def thread_link_flags(self, environment: 'Environment') -> T.List[str]: return self._to_host_flags(self.host_compiler.thread_link_flags(environment), _Phase.LINKER) - def sanity_check(self, work_dir: str, environment: 'Environment') -> None: + def sanity_check(self, work_dir: str, env: 'Environment') -> None: mlog.debug('Sanity testing ' + self.get_display_language() + ' compiler:', ' '.join(self.exelist)) mlog.debug('Is cross compiler: %s.' % str(self.is_cross)) @@ -483,7 +483,18 @@ def sanity_check(self, work_dir: str, environment: 'Environment') -> None: # environment set up properly. Of course, this only works for native # builds; For cross builds we must still use the exe_wrapper (if any). self.detected_cc = '' - flags = ['-w', '-cudart', 'static', source_name] + flags = [] + + # Disable warnings, compile with statically-linked runtime for minimum + # reliance on the system. + flags += ['-w', '-cudart', 'static', source_name] + + # Use the -ccbin option, if available, even during sanity checking. + # Otherwise, on systems where CUDA does not support the default compiler, + # NVCC becomes unusable. + flags += self.get_ccbin_args(env.coredata.options) + + # If cross-compiling, we can't run the sanity check, only compile it. if self.is_cross and self.exe_wrapper is None: # Linking cross built apps is painful. You can't really # tell if you should use -nostdlib or not and for example @@ -563,10 +574,14 @@ def has_header_symbol(self, hname: str, symbol: str, prefix: str, def get_options(self) -> 'KeyedOptionDictType': opts = super().get_options() - key = OptionKey('std', machine=self.for_machine, lang=self.language) - opts.update({key: coredata.UserComboOption('C++ language standard to use with cuda', - ['none', 'c++03', 'c++11', 'c++14'], - 'none')}) + std_key = OptionKey('std', machine=self.for_machine, lang=self.language) + ccbindir_key = OptionKey('ccbindir', machine=self.for_machine, lang=self.language) + opts.update({ + std_key: coredata.UserComboOption('C++ language standard to use with CUDA', + ['none', 'c++03', 'c++11', 'c++14', 'c++17'], 'none'), + ccbindir_key: coredata.UserStringOption('CUDA non-default toolchain directory to use (-ccbin)', + ''), + }) return opts def _to_host_compiler_options(self, options: 'KeyedOptionDictType') -> 'KeyedOptionDictType': @@ -574,7 +589,7 @@ def _to_host_compiler_options(self, options: 'KeyedOptionDictType') -> 'KeyedOpt return OptionOverrideProxy(overrides, self.host_compiler.get_options()) def get_option_compile_args(self, options: 'KeyedOptionDictType') -> T.List[str]: - args = [] + args = self.get_ccbin_args(options) # On Windows, the version of the C++ standard used by nvcc is dictated by # the combination of CUDA version and MSVC version; the --std= is thus ignored # and attempting to use it will result in a warning: https://stackoverflow.com/a/51272091/741027 @@ -587,7 +602,8 @@ def get_option_compile_args(self, options: 'KeyedOptionDictType') -> T.List[str] return args + self._to_host_flags(self.host_compiler.get_option_compile_args(self._to_host_compiler_options(options))) def get_option_link_args(self, options: 'KeyedOptionDictType') -> T.List[str]: - return self._to_host_flags(self.host_compiler.get_option_link_args(self._to_host_compiler_options(options)), _Phase.LINKER) + args = self.get_ccbin_args(options) + return args + self._to_host_flags(self.host_compiler.get_option_link_args(self._to_host_compiler_options(options)), _Phase.LINKER) def get_soname_args(self, env: 'Environment', prefix: str, shlib_name: str, suffix: str, soversion: str, @@ -688,3 +704,11 @@ def get_dependency_compile_args(self, dep: 'Dependency') -> T.List[str]: def get_dependency_link_args(self, dep: 'Dependency') -> T.List[str]: return self._to_host_flags(super().get_dependency_link_args(dep), _Phase.LINKER) + + def get_ccbin_args(self, options: 'KeyedOptionDictType') -> T.List[str]: + key = OptionKey('ccbindir', machine=self.for_machine, lang=self.language) + ccbindir = options[key].value + if isinstance(ccbindir, str) and ccbindir != '': + return [self._shield_nvcc_list_arg('-ccbin='+ccbindir, False)] + else: + return [] From 3410465178b471d57d8591561a5c4f45015aa782 Mon Sep 17 00:00:00 2001 From: Olexa Bilaniuk Date: Tue, 16 Feb 2021 17:01:24 -0500 Subject: [PATCH 6/6] Small patch to unittests to initialize CUDA compiler correctly. Suggested by Dylan Baker. --- run_unittests.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/run_unittests.py b/run_unittests.py index 80833425ff81..0bd6d4bf9c61 100755 --- a/run_unittests.py +++ b/run_unittests.py @@ -3769,7 +3769,7 @@ def test_templates(self): env = get_fake_env() for l in ['cpp', 'cs', 'd', 'java', 'cuda', 'fortran', 'objc', 'objcpp', 'rust']: try: - comp = getattr(env, f'detect_{l}_compiler')(MachineChoice.HOST) + comp = env.detect_compiler_for(l, MachineChoice.HOST) with tempfile.TemporaryDirectory() as d: comp.sanity_check(d, env) langs.append(l)