diff --git a/BUILD b/BUILD index 2578c6e..f287781 100644 --- a/BUILD +++ b/BUILD @@ -26,3 +26,9 @@ filegroup( # If you are looking into the implementation, start with the overview in ImplementationReadme.md. exports_files(["refresh.template.py", "check_python_version.template.py"]) # For implicit use by the refresh_compile_commands macro, not direct use. + +cc_binary( + name = "print_args", + srcs = ["print_args.cpp"], + visibility = ["//visibility:public"], +) diff --git a/print_args.cpp b/print_args.cpp new file mode 100644 index 0000000..b672329 --- /dev/null +++ b/print_args.cpp @@ -0,0 +1,13 @@ +// Prints the arguments passed to the script + +#include + +int main(int argc, char *argv[]) { + std::cout << "===HEDRON_COMPILE_COMMANDS_BEGIN_ARGS===\n"; + for (int i = 1; i < argc; ++i) { + std::cout << argv[i] << "\n"; + } + std::cout << "===HEDRON_COMPILE_COMMANDS_END_ARGS===\n"; + // We purposely return a non-zero exit code to have the emcc process exit after running this fake clang wrapper. + return 1; +} diff --git a/refresh.template.py b/refresh.template.py index 0dd4404..70f962a 100644 --- a/refresh.template.py +++ b/refresh.template.py @@ -283,17 +283,6 @@ def _get_headers_gcc(compile_args: typing.List[str], source_path: str, action_ke return headers, should_cache -@functools.lru_cache(maxsize=None) -def _get_clang_or_gcc(): - """Returns clang or gcc, if you have one of them on your path.""" - if shutil.which('clang'): - return 'clang' - elif shutil.which('gcc'): - return 'gcc' - else: - return None - - def windows_list2cmdline(seq): """ Copied from list2cmdline in https://github.com/python/cpython/blob/main/Lib/subprocess.py because we need it but it's not exported as part of the public API. @@ -567,18 +556,7 @@ def _get_headers(compile_action, source_path: str): if compile_action.arguments[0].endswith('cl.exe'): # cl.exe and also clang-cl.exe headers, should_cache = _get_headers_msvc(compile_action.arguments, source_path) else: - # Emscripten is tricky. There isn't an easy way to make it emcc run without lots of environment variables. - # So...rather than doing our usual script unwrapping, we just swap in clang/gcc and use that to get headers, knowing that they'll accept the same argument format. - # You can unwrap emcc.sh to emcc.py via next(pathlib.Path('external').glob('emscripten_bin_*/emscripten/emcc.py')).as_posix() - # But then the underlying emcc needs a configuration file that itself depends on lots of environment variables. - # If we ever pick this back up, note that you can supply that config via compile_args += ["--em-config", "external/emsdk/emscripten_toolchain/emscripten_config"] - args = compile_action.arguments - if args[0].endswith('emcc.sh') or args[0].endswith('emcc.bat'): - alternate_compiler = _get_clang_or_gcc() - if not alternate_compiler: return set() # Skip getting headers. - args = args.copy() - args[0] = alternate_compiler - headers, should_cache = _get_headers_gcc(args, source_path, compile_action.actionKey) + headers, should_cache = _get_headers_gcc(compile_action.arguments, source_path, compile_action.actionKey) # Cache for future use if output_file and should_cache: @@ -767,6 +745,54 @@ def _apple_platform_patch(compile_args: typing.List[str]): return compile_args +def _emscripten_platform_patch(compile_action): + """De-Bazel the command into something clangd can parse. + + This function has fixes specific to Emscripten platforms, but you should call it on all platforms. It'll determine whether the fixes should be applied or not + """ + emcc_driver = pathlib.Path(compile_action.arguments[0]) + if not emcc_driver.name.startswith('emcc'): + return compile_action.arguments + + workspace_absolute = pathlib.PurePath(os.environ["BUILD_WORKSPACE_DIRECTORY"]) + + environment = compile_action.environmentVariables.copy() + environment['EXT_BUILD_ROOT'] = str(workspace_absolute) + environment['EMCC_SKIP_SANITY_CHECK'] = '1' + environment['EM_COMPILER_WRAPPER'] = str(pathlib.PurePath({print_args_executable})) + if 'PATH' not in environment: + environment['PATH'] = os.environ['PATH'] + + # We run the emcc process with the environment variable EM_COMPILER_WRAPPER to intercept the command line arguments passed to `clang`. + emcc_process = subprocess.run( + # On Windows, it fails to spawn the subprocess when the path uses forward slashes as a separator. + # Here, we convert emcc driver path to use the native path separator. + [str(emcc_driver)] + compile_action.arguments[1:], + # MIN_PY=3.7: Replace PIPEs with capture_output. + stdout=subprocess.PIPE, + stderr=subprocess.PIPE, + env=environment, + encoding=locale.getpreferredencoding(), + check=False, # We explicitly ignore errors and carry on. + ) + + lines = emcc_process.stdout.splitlines() + + # Parse the arguments from the output of the emcc process. + if BEGIN_ARGS_MARKER in lines: + begin_args_idx = lines.index(BEGIN_ARGS_MARKER) + end_args_idx = lines.index(END_ARGS_MARKER, begin_args_idx + 1) + args = lines[begin_args_idx + 1:end_args_idx] + clang_driver = pathlib.PurePath(args[0]) + if _is_relative_to(clang_driver, workspace_absolute): + args[0] = clang_driver.relative_to(workspace_absolute).as_posix() + return args + + assert False, f'Failed to parse emcc output: {emcc_process.stderr}' +BEGIN_ARGS_MARKER = '===HEDRON_COMPILE_COMMANDS_BEGIN_ARGS===' +END_ARGS_MARKER = '===HEDRON_COMPILE_COMMANDS_END_ARGS===' + + def _all_platform_patch(compile_args: typing.List[str]): """Apply de-Bazeling fixes to the compile command that are shared across target platforms.""" # clangd writes module cache files to the wrong place @@ -1023,10 +1049,14 @@ def _get_cpp_command_for_files(compile_action): Undo Bazel-isms and figures out which files clangd should apply the command to. """ - # Patch command by platform - compile_action.arguments = _all_platform_patch(compile_action.arguments) + # Condense aquery's environment variables into a dictionary, the format you might expect. + compile_action.environmentVariables = {pair.key: pair.value for pair in getattr(compile_action, 'environmentVariables', [])} + + # Patch command by platform, revealing any hidden arguments. compile_action.arguments = _apple_platform_patch(compile_action.arguments) + compile_action.arguments = _emscripten_platform_patch(compile_action) # Android and Linux and grailbio LLVM toolchains: Fine as is; no special patching needed. + compile_action.arguments = _all_platform_patch(compile_action.arguments) source_files, header_files = _get_files(compile_action) diff --git a/refresh_compile_commands.bzl b/refresh_compile_commands.bzl index 537c9fc..e779c94 100644 --- a/refresh_compile_commands.bzl +++ b/refresh_compile_commands.bzl @@ -98,6 +98,7 @@ def refresh_compile_commands( version_checker_script_name, script_name, ], + data = ["@hedron_compile_commands//:print_args"], imports = [''], # Allows binary to import templated script, even if this macro is being called inside a sub package. See https://github.com/hedronvision/bazel-compile-commands-extractor/issues/137 **kwargs ) @@ -115,6 +116,7 @@ def _expand_template_impl(ctx): " {windows_default_include_paths}": "\n".join([" %r," % path for path in find_cpp_toolchain(ctx).built_in_include_directories]), # find_cpp_toolchain is from https://docs.bazel.build/versions/main/integrating-with-rules-cc.html "{exclude_headers}": repr(ctx.attr.exclude_headers), "{exclude_external_sources}": repr(ctx.attr.exclude_external_sources), + "{print_args_executable}": repr(ctx.executable._print_args_executable.path), }, ) return DefaultInfo(files = depset([script])) @@ -125,6 +127,7 @@ _expand_template = rule( "exclude_external_sources": attr.bool(default = False), "exclude_headers": attr.string(values = ["all", "external", ""]), # "" needed only for compatibility with Bazel < 3.6.0 "_script_template": attr.label(allow_single_file = True, default = "refresh.template.py"), + "_print_args_executable": attr.label(executable = True, cfg = "target", default = "//:print_args"), # For Windows INCLUDE. If this were eliminated, for example by the resolution of https://github.com/clangd/clangd/issues/123, we'd be able to just use a macro and skylib's expand_template rule: https://github.com/bazelbuild/bazel-skylib/pull/330 # Once https://github.com/bazelbuild/bazel/pull/17108 is widely released, we should be able to eliminate this and get INCLUDE directly. Perhaps for 7.0? Should be released in the sucessor to 6.0 "_cc_toolchain": attr.label(default = "@bazel_tools//tools/cpp:current_cc_toolchain"),