From 5a2d420d3a5994e30dcb953167c901df86cbcc1b Mon Sep 17 00:00:00 2001 From: Jingyue Wu Date: Fri, 26 Apr 2024 23:45:51 +0000 Subject: [PATCH 1/6] Revert "NVFUSER_DISTRIBUTED instead of USE_DISTRIBUTED (#1711)" This reverts commit 9ae6c767212a12e7b28be5627744afd5965296d6. --- CMakeLists.txt | 8 -------- csrc/multidevice/communication.cpp | 4 ++-- csrc/multidevice/communication.h | 2 +- csrc/multidevice/communicator.cpp | 8 ++++---- csrc/multidevice/communicator.h | 2 +- csrc/multidevice/utils.cpp | 2 +- setup.py | 17 +---------------- tools/gen_nvfuser_version.py | 16 ---------------- 8 files changed, 10 insertions(+), 49 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index 6ede7cb1219..001c3ad0950 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -15,13 +15,6 @@ set(NVFUSER_THIRD_PARTY_DIR "${NVFUSER_ROOT}/third_party") option(NVFUSER_STANDALONE_BUILD_WITH_UCC "" OFF) option(NVFUSER_BUILD_WITH_ASAN "Build nvFuser with asan" OFF) -include(CMakeDependentOption) -cmake_dependent_option(NVFUSER_DISTRIBUTED "" ON "USE_DISTRIBUTED" OFF) -if (NVFUSER_DISTRIBUTED) - add_compile_definitions(NVFUSER_DISTRIBUTED) -endif() -message(STATUS "Setting NVFUSER_DISTRIBUTED=${NVFUSER_DISTRIBUTED}") - # We try to update which C++ standard we use together in lockstep across all # built libraries, and these variables control which that is. Generally we are # on C++20, but we still support a version of CUDA (11) that does not recognize @@ -769,7 +762,6 @@ message(STATUS "******** Nvfuser configuration summary ********") message(STATUS " UCC_FOUND: ${UCC_FOUND}") message(STATUS " NVFUSER_STANDALONE_BUILD_WITH_UCC : ${NVFUSER_STANDALONE_BUILD_WITH_UCC}") message(STATUS " NVFUSER_BUILD_WITH_ASAN : ${NVFUSER_BUILD_WITH_ASAN}") -message(STATUS " NVFUSER_DISTRIBUTED : ${NVFUSER_DISTRIBUTED}") message(STATUS " NVFUSER_CPP_STANDARD : ${NVFUSER_CPP_STANDARD}") if(NVFUSER_STANDALONE_BUILD_WITH_UCC) diff --git a/csrc/multidevice/communication.cpp b/csrc/multidevice/communication.cpp index 4f4db1711ab..0882e9af335 100644 --- a/csrc/multidevice/communication.cpp +++ b/csrc/multidevice/communication.cpp @@ -6,7 +6,7 @@ */ // clang-format on #include -#if defined(NVFUSER_DISTRIBUTED) && defined(USE_C10D_NCCL) +#if defined(USE_DISTRIBUTED) && defined(USE_C10D_NCCL) #include #endif #include @@ -229,7 +229,7 @@ c10::intrusive_ptr Reduce::post( c10d::ReduceOptions options = { .reduceOp = params_.redOp, .rootRank = root_relative_index_}; auto team_backend = comm.getBackendForTeam(params_.team, backend); -#if defined(NVFUSER_DISTRIBUTED) && defined(USE_C10D_NCCL) +#if defined(USE_DISTRIBUTED) && defined(USE_C10D_NCCL) auto nccl_backend = dynamic_cast(team_backend.get()); if (nccl_backend) { #if NVF_TORCH_VERSION_NO_LESS(2, 3, 0) diff --git a/csrc/multidevice/communication.h b/csrc/multidevice/communication.h index 5c2e59f887d..77f7a4b6de4 100644 --- a/csrc/multidevice/communication.h +++ b/csrc/multidevice/communication.h @@ -9,7 +9,7 @@ #include #include -#ifdef NVFUSER_DISTRIBUTED +#ifdef USE_DISTRIBUTED #include #else #include diff --git a/csrc/multidevice/communicator.cpp b/csrc/multidevice/communicator.cpp index b113edd61d6..67766ce85d4 100644 --- a/csrc/multidevice/communicator.cpp +++ b/csrc/multidevice/communicator.cpp @@ -10,7 +10,7 @@ #include #include -#ifdef NVFUSER_DISTRIBUTED +#ifdef USE_DISTRIBUTED #include #ifdef USE_C10D_GLOO #include @@ -132,7 +132,7 @@ inline std::string getTeamKey(const Team& team, CommunicatorBackend backend) { }); } -#ifdef NVFUSER_DISTRIBUTED +#ifdef USE_DISTRIBUTED // creates and return a process group backend c10::intrusive_ptr createBackend( CommunicatorBackend backend, @@ -187,7 +187,7 @@ Communicator::Communicator( return; } -#ifdef NVFUSER_DISTRIBUTED +#ifdef USE_DISTRIBUTED c10d::TCPStoreOptions store_opts; { char hostname[HOST_NAME_MAX]; // NOLINT (modernize-avoid-c-arrays) @@ -222,7 +222,7 @@ c10::intrusive_ptr Communicator::getBackendForTeam( // check if backend associated with the team is present in the cache if (backends_.find(team_key) == backends_.end()) { // create the backend and cache it -#ifdef NVFUSER_DISTRIBUTED +#ifdef USE_DISTRIBUTED // check that the caller's rank belongs to the requested team auto rank_it = std::find(team.begin(), team.end(), deviceId()); NVF_ERROR( diff --git a/csrc/multidevice/communicator.h b/csrc/multidevice/communicator.h index 9666ec10cb9..3a8fc465a01 100644 --- a/csrc/multidevice/communicator.h +++ b/csrc/multidevice/communicator.h @@ -13,7 +13,7 @@ #include #include -#ifdef NVFUSER_DISTRIBUTED +#ifdef USE_DISTRIBUTED #include #include #include diff --git a/csrc/multidevice/utils.cpp b/csrc/multidevice/utils.cpp index 037862e41e0..12a0f4b9a7c 100644 --- a/csrc/multidevice/utils.cpp +++ b/csrc/multidevice/utils.cpp @@ -21,7 +21,7 @@ namespace nvfuser { NVF_API bool distributedEnabled() { -#ifdef NVFUSER_DISTRIBUTED +#ifdef USE_DISTRIBUTED return true; #else return false; diff --git a/setup.py b/setup.py index 5f5e47a8e03..3521035db1a 100644 --- a/setup.py +++ b/setup.py @@ -26,9 +26,6 @@ # --build-with-ucc # Build nvfuser with UCC support. You may need to specify environment variables of UCC_HOME, UCC_DIR, UCX_HOME, UCX_DIR. # -# --build-without-distributed -# Build nvfuser without multidevice support -# # --debug # Building nvfuser in debug mode # @@ -74,7 +71,6 @@ NO_NINJA = False BUILD_WITH_UCC = False BUILD_WITH_ASAN = False -BUILD_WITHOUT_DISTRIBUTED = False OVERWRITE_VERSION = False VERSION_TAG = None BUILD_TYPE = "Release" @@ -106,9 +102,6 @@ if arg == "--build-with-asan": BUILD_WITH_ASAN = True continue - if arg == "--build-without-distributed": - BUILD_WITHOUT_DISTRIBUTED = True - continue if arg == "--debug": BUILD_TYPE = "Debug" continue @@ -289,10 +282,7 @@ def cmake(install_prefix: str = "./nvfuser"): if not os.path.exists(cmake_build_dir): os.makedirs(cmake_build_dir) - from tools.gen_nvfuser_version import ( - get_pytorch_cmake_prefix, - get_pytorch_use_distributed, - ) + from tools.gen_nvfuser_version import get_pytorch_cmake_prefix # this is used to suppress import error. # so we can get the right pytorch prefix for cmake @@ -306,8 +296,6 @@ def cmake(install_prefix: str = "./nvfuser"): logger.setLevel(logger_level) - pytorch_use_distributed = get_pytorch_use_distributed() - # generate cmake directory cmd_str = [ get_cmake_bin(), @@ -315,7 +303,6 @@ def cmake(install_prefix: str = "./nvfuser"): "-DCMAKE_BUILD_TYPE=" + BUILD_TYPE, f"-DCMAKE_INSTALL_PREFIX={install_prefix}", f"-DNVFUSER_CPP_STANDARD={CPP_STANDARD}", - f"-DUSE_DISTRIBUTED={pytorch_use_distributed}", "-B", cmake_build_dir, ] @@ -333,8 +320,6 @@ def cmake(install_prefix: str = "./nvfuser"): cmd_str.append("-DBUILD_NVFUSER_BENCHMARK=ON") if BUILD_WITH_ASAN: cmd_str.append("-DNVFUSER_BUILD_WITH_ASAN=ON") - if BUILD_WITHOUT_DISTRIBUTED: - cmd_str.append("-DNVFUSER_DISTRIBUTED=OFF") cmd_str.append(".") print(f"Configuring CMake with {' '.join(cmd_str)}") diff --git a/tools/gen_nvfuser_version.py b/tools/gen_nvfuser_version.py index 789aa96d37a..7537ff3ad4a 100644 --- a/tools/gen_nvfuser_version.py +++ b/tools/gen_nvfuser_version.py @@ -45,22 +45,6 @@ def get_pytorch_cmake_prefix(): return stdout_msg.decode("utf-8").rstrip("\n") -def get_pytorch_use_distributed(): - from subprocess import Popen, PIPE - - # need to do this in a separate process so we are not going to delete nvfuser library while it's loaded by torch - process_torch_prefix = Popen( - [ - sys.executable, - "-c", - "import torch; print(torch._C._has_distributed())", - ], - stdout=PIPE, - ) - stdout_msg, error_msg = process_torch_prefix.communicate() - return stdout_msg.decode("utf-8").rstrip("\n") - - if __name__ == "__main__": version_file = nvfuser_root / "nvfuser" / "version.py" with open(version_file, "w") as f: From a5dd89b7f12fe168b1bc9ac0ed0c30381f98cebc Mon Sep 17 00:00:00 2001 From: Jingyue Wu Date: Sat, 27 Apr 2024 02:04:43 +0000 Subject: [PATCH 2/6] Remove USE_DISTRIBUTED. --- csrc/multidevice/c10d_mock.h | 134 ------------------------ csrc/multidevice/communication.cpp | 4 +- csrc/multidevice/communication.h | 4 - csrc/multidevice/communicator.cpp | 10 -- csrc/multidevice/communicator.h | 31 +++--- csrc/multidevice/utils.cpp | 8 -- csrc/multidevice/utils.h | 3 - tests/cpp/test_multidevice_pipeline.cpp | 5 +- tests/cpp/test_resharding.cpp | 3 - 9 files changed, 16 insertions(+), 186 deletions(-) delete mode 100644 csrc/multidevice/c10d_mock.h diff --git a/csrc/multidevice/c10d_mock.h b/csrc/multidevice/c10d_mock.h deleted file mode 100644 index 58572507216..00000000000 --- a/csrc/multidevice/c10d_mock.h +++ /dev/null @@ -1,134 +0,0 @@ -// clang-format off -/* - * SPDX-FileCopyrightText: Copyright (c) 2024-present NVIDIA CORPORATION & AFFILIATES. - * All rights reserved. - * SPDX-License-Identifier: BSD-3-Clause - */ -// clang-format on -#pragma once - -#include -#include -#include - -namespace c10d { -class Work : public torch::CustomClassHolder { - public: - void wait() {} -}; - -struct ReduceOp : torch::CustomClassHolder { - enum RedOpType { - SUM, - AVG, - PRODUCT, - MIN, - MAX, - BAND, - BOR, - BXOR, - UNUSED, - }; - - ReduceOp() = default; - ReduceOp(RedOpType op) : op_(op) {} - - RedOpType op_ = UNUSED; -}; - -struct ReduceScatterOptions { - ReduceOp reduceOp = ReduceOp::UNUSED; -}; - -struct ScatterOptions { - int64_t rootRank = 0; -}; - -struct AllgatherOptions {}; - -struct GatherOptions { - int64_t rootRank = 0; -}; - -struct BroadcastOptions { - int64_t rootRank = 0; -}; - -struct AllreduceOptions { - ReduceOp reduceOp = ReduceOp::UNUSED; -}; - -struct ReduceOptions { - ReduceOp reduceOp = ReduceOp::UNUSED; - int64_t rootRank = 0; -}; - -class Backend : public torch::CustomClassHolder { - public: - c10::intrusive_ptr barrier() { - return c10::make_intrusive(); - } - - c10::intrusive_ptr send( - std::vector& tensors, - int dstRank, - int tag) { - return c10::make_intrusive(); - } - - c10::intrusive_ptr recv( - std::vector& tensors, - int srcRank, - int tag) { - return c10::make_intrusive(); - } - - c10::intrusive_ptr allgather( - std::vector>& outputTensors, - std::vector& inputTensors, - const AllgatherOptions& opts = AllgatherOptions()) { - return c10::make_intrusive(); - } - - c10::intrusive_ptr gather( - std::vector>& outputTensors, - std::vector& inputTensors, - const GatherOptions& opts = GatherOptions()) { - return c10::make_intrusive(); - } - - c10::intrusive_ptr reduce_scatter( - std::vector& outputTensors, - std::vector>& inputTensors, - const ReduceScatterOptions& opts = ReduceScatterOptions()) { - return c10::make_intrusive(); - } - c10::intrusive_ptr scatter( - std::vector& outputTensors, - std::vector>& inputTensors, - const ScatterOptions& opts = ScatterOptions()) { - return c10::make_intrusive(); - } - - c10::intrusive_ptr broadcast( - std::vector& tensors, - const BroadcastOptions& opts = BroadcastOptions()) { - return c10::make_intrusive(); - } - - c10::intrusive_ptr allreduce( - std::vector& tensors, - const AllreduceOptions& opts = AllreduceOptions()) { - return c10::make_intrusive(); - } - - c10::intrusive_ptr reduce( - std::vector& tensors, - const ReduceOptions& opts = ReduceOptions()) { - return c10::make_intrusive(); - } -}; - -class TCPStore : public torch::CustomClassHolder {}; - -} // namespace c10d diff --git a/csrc/multidevice/communication.cpp b/csrc/multidevice/communication.cpp index 0882e9af335..d512cd4dcb3 100644 --- a/csrc/multidevice/communication.cpp +++ b/csrc/multidevice/communication.cpp @@ -6,7 +6,7 @@ */ // clang-format on #include -#if defined(USE_DISTRIBUTED) && defined(USE_C10D_NCCL) +#if defined(USE_C10D_NCCL) #include #endif #include @@ -229,7 +229,7 @@ c10::intrusive_ptr Reduce::post( c10d::ReduceOptions options = { .reduceOp = params_.redOp, .rootRank = root_relative_index_}; auto team_backend = comm.getBackendForTeam(params_.team, backend); -#if defined(USE_DISTRIBUTED) && defined(USE_C10D_NCCL) +#if defined(USE_C10D_NCCL) auto nccl_backend = dynamic_cast(team_backend.get()); if (nccl_backend) { #if NVF_TORCH_VERSION_NO_LESS(2, 3, 0) diff --git a/csrc/multidevice/communication.h b/csrc/multidevice/communication.h index 77f7a4b6de4..762a35b97b0 100644 --- a/csrc/multidevice/communication.h +++ b/csrc/multidevice/communication.h @@ -9,11 +9,7 @@ #include #include -#ifdef USE_DISTRIBUTED #include -#else -#include -#endif #include #include diff --git a/csrc/multidevice/communicator.cpp b/csrc/multidevice/communicator.cpp index 67766ce85d4..e0162c8bc8e 100644 --- a/csrc/multidevice/communicator.cpp +++ b/csrc/multidevice/communicator.cpp @@ -10,7 +10,6 @@ #include #include -#ifdef USE_DISTRIBUTED #include #ifdef USE_C10D_GLOO #include @@ -21,7 +20,6 @@ #if defined(USE_C10D_UCC) && defined(NVFUSER_BUILD_WITH_UCC) #include #endif -#endif namespace nvfuser { @@ -132,7 +130,6 @@ inline std::string getTeamKey(const Team& team, CommunicatorBackend backend) { }); } -#ifdef USE_DISTRIBUTED // creates and return a process group backend c10::intrusive_ptr createBackend( CommunicatorBackend backend, @@ -164,7 +161,6 @@ c10::intrusive_ptr createBackend( #endif NVF_ERROR(false, "no distributed backend available"); } -#endif } // namespace Communicator::Communicator( @@ -187,7 +183,6 @@ Communicator::Communicator( return; } -#ifdef USE_DISTRIBUTED c10d::TCPStoreOptions store_opts; { char hostname[HOST_NAME_MAX]; // NOLINT (modernize-avoid-c-arrays) @@ -203,7 +198,6 @@ Communicator::Communicator( c10d::TCPStoreOptions::kDefaultPort; // 29500 store_opts.port = master_port_ ? master_port_ : comm_master_port_default; store_ = c10::make_intrusive(master_addr_, store_opts); -#endif #if defined(USE_C10D_UCC) && defined(NVFUSER_BUILD_WITH_UCC) ucc_available_ = true; @@ -222,7 +216,6 @@ c10::intrusive_ptr Communicator::getBackendForTeam( // check if backend associated with the team is present in the cache if (backends_.find(team_key) == backends_.end()) { // create the backend and cache it -#ifdef USE_DISTRIBUTED // check that the caller's rank belongs to the requested team auto rank_it = std::find(team.begin(), team.end(), deviceId()); NVF_ERROR( @@ -237,9 +230,6 @@ c10::intrusive_ptr Communicator::getBackendForTeam( c10::make_intrusive(team_key, store_), team_rank, static_cast(team.size())); -#else - backends_[team_key] = c10::make_intrusive(); -#endif } return backends_.at(team_key); } diff --git a/csrc/multidevice/communicator.h b/csrc/multidevice/communicator.h index 3a8fc465a01..33969f25129 100644 --- a/csrc/multidevice/communicator.h +++ b/csrc/multidevice/communicator.h @@ -10,32 +10,25 @@ #include #include #include - -#include -#include -#ifdef USE_DISTRIBUTED #include #include #include -#else -#include -#endif + +#include +#include #include namespace nvfuser { -/* - This file implements the class Communicator which sets up the inter-process - Backend. This class contains inter-process information, such as the rank, the - world size, as well as the Process Group that can be called to perform - inter-process communications. - - Each process is associated with a unique deviceId and device. The actual MPI - rank remains private to the class and should not be used by the user. The - communicator class holds privately the mappings ranks <-> device IDs <-> - device. - -*/ +// This file implements the class Communicator which sets up the inter-process +// Backend. This class contains inter-process information, such as the rank, the +// world size, as well as the Process Group that can be called to perform +// inter-process communications. +// +// Each process is associated with a unique deviceId and device. The actual MPI +// rank remains private to the class and should not be used by the user. The +// communicator class holds privately the mappings ranks <-> device IDs <-> +// device. using RankType = DeviceIdxType; diff --git a/csrc/multidevice/utils.cpp b/csrc/multidevice/utils.cpp index 12a0f4b9a7c..df315b2045f 100644 --- a/csrc/multidevice/utils.cpp +++ b/csrc/multidevice/utils.cpp @@ -20,14 +20,6 @@ namespace nvfuser { -NVF_API bool distributedEnabled() { -#ifdef USE_DISTRIBUTED - return true; -#else - return false; -#endif -} - namespace { std::unordered_set getShardedIterDomains(TensorView* tv) { diff --git a/csrc/multidevice/utils.h b/csrc/multidevice/utils.h index 7926c1873df..8f0e1a70e43 100644 --- a/csrc/multidevice/utils.h +++ b/csrc/multidevice/utils.h @@ -14,9 +14,6 @@ namespace nvfuser { -// Returns true iff nvFuser was compiled with distributed APIs enabled. -NVF_API bool distributedEnabled(); - // Returns whether a TensorView has a non-reduction axis parallelized Didx // Checks that the other non-reduction axis are not parallelized on Didx NVF_API bool isSharded(TensorView*); diff --git a/tests/cpp/test_multidevice_pipeline.cpp b/tests/cpp/test_multidevice_pipeline.cpp index 27d857bf6dc..85f7f74c257 100644 --- a/tests/cpp/test_multidevice_pipeline.cpp +++ b/tests/cpp/test_multidevice_pipeline.cpp @@ -43,10 +43,9 @@ using namespace torch::jit::fuser::cuda; using namespace at::indexing; // To run the following tests on several devices, pytorch must be installed with -// the flag USE_DISTRIBUTED=1 and nccl support. With that, nvFuser is built by -// default with NVFUSER_DISTRIBUTED defined. Then, on a node with at least 6 +// the flag USE_DISTRIBUTED=1 and nccl support. Then, on a node with at least 6 // GPUs, run the test using mpirun: `mpirun -np 6 build/test_multidevice -// --gtest_filter=PipelineTwoStages*`. +// --gtest_filter=PipelineTestTwoStages*`. TEST_F(PipelineTest, Pipeline) { const std::vector input_shape1 = {6, 7}; diff --git a/tests/cpp/test_resharding.cpp b/tests/cpp/test_resharding.cpp index 0ef340493b1..5a6f7422ba7 100644 --- a/tests/cpp/test_resharding.cpp +++ b/tests/cpp/test_resharding.cpp @@ -225,9 +225,6 @@ TEST_F(ReshardingTest, Detection) { } TEST_P(ReshardingTest, Insert) { - if (!distributedEnabled()) { // Test only works with distributed - GTEST_SKIP() << "Requires distributed API"; - } auto [mesh0, mesh1, From 048b30a6a3813831dbff1286d143b828520bdf68 Mon Sep 17 00:00:00 2001 From: Jingyue Wu Date: Mon, 29 Apr 2024 17:42:39 +0000 Subject: [PATCH 3/6] Require USE_DISTRIBUTED to be on. --- setup.py | 7 ++++++- tools/gen_nvfuser_version.py | 17 +++++++++++++++++ 2 files changed, 23 insertions(+), 1 deletion(-) diff --git a/setup.py b/setup.py index 3521035db1a..ff4198d6924 100644 --- a/setup.py +++ b/setup.py @@ -282,7 +282,10 @@ def cmake(install_prefix: str = "./nvfuser"): if not os.path.exists(cmake_build_dir): os.makedirs(cmake_build_dir) - from tools.gen_nvfuser_version import get_pytorch_cmake_prefix + from tools.gen_nvfuser_version import ( + get_pytorch_cmake_prefix, + get_pytorch_use_distributed, + ) # this is used to suppress import error. # so we can get the right pytorch prefix for cmake @@ -296,6 +299,8 @@ def cmake(install_prefix: str = "./nvfuser"): logger.setLevel(logger_level) + assert get_pytorch_use_distributed(), "nvFuser requires PyTorch to be built with USE_DISTRIBUTED on." + # generate cmake directory cmd_str = [ get_cmake_bin(), diff --git a/tools/gen_nvfuser_version.py b/tools/gen_nvfuser_version.py index 7537ff3ad4a..b115e4cb51a 100644 --- a/tools/gen_nvfuser_version.py +++ b/tools/gen_nvfuser_version.py @@ -45,6 +45,23 @@ def get_pytorch_cmake_prefix(): return stdout_msg.decode("utf-8").rstrip("\n") +def get_pytorch_use_distributed() -> bool: + from subprocess import Popen, PIPE + + # need to do this in a separate process so we are not going to delete nvfuser library while it's loaded by torch + process_torch_prefix = Popen( + [ + sys.executable, + "-c", + "import torch; print(torch._C._has_distributed())", + ], + stdout=PIPE, + ) + stdout_msg, _ = process_torch_prefix.communicate() + stdout_msg = stdout_msg.decode("utf-8").rstrip("\n") + return bool(stdout_msg) + + if __name__ == "__main__": version_file = nvfuser_root / "nvfuser" / "version.py" with open(version_file, "w") as f: From 3d4f96000f754081e474160c53d405cac93bdb36 Mon Sep 17 00:00:00 2001 From: Jingyue Wu Date: Mon, 29 Apr 2024 17:44:26 +0000 Subject: [PATCH 4/6] lintrunner. --- setup.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/setup.py b/setup.py index ff4198d6924..e430c85ee66 100644 --- a/setup.py +++ b/setup.py @@ -299,7 +299,9 @@ def cmake(install_prefix: str = "./nvfuser"): logger.setLevel(logger_level) - assert get_pytorch_use_distributed(), "nvFuser requires PyTorch to be built with USE_DISTRIBUTED on." + assert ( + get_pytorch_use_distributed() + ), "nvFuser requires PyTorch to be built with USE_DISTRIBUTED on." # generate cmake directory cmd_str = [ From ef0d78c3ef4f2fe2b59a389aae5ba2fe3b57a5c2 Mon Sep 17 00:00:00 2001 From: Jingyue Wu Date: Mon, 29 Apr 2024 13:49:04 -0700 Subject: [PATCH 5/6] Fix literal evaluation. --- tools/gen_nvfuser_version.py | 15 ++++++--------- 1 file changed, 6 insertions(+), 9 deletions(-) diff --git a/tools/gen_nvfuser_version.py b/tools/gen_nvfuser_version.py index b115e4cb51a..c4072939d46 100644 --- a/tools/gen_nvfuser_version.py +++ b/tools/gen_nvfuser_version.py @@ -1,6 +1,7 @@ # SPDX-FileCopyrightText: Copyright (c) 2023-present NVIDIA CORPORATION & AFFILIATES. # All rights reserved. # SPDX-License-Identifier: BSD-3-Clause +import ast import subprocess import sys from pathlib import Path @@ -30,36 +31,32 @@ def get_version() -> str: def get_pytorch_cmake_prefix(): - from subprocess import Popen, PIPE - # need to do this in a separate process so we are not going to delete nvfuser library while it's loaded by torch - process_torch_prefix = Popen( + process_torch_prefix = subprocess.Popen( [ sys.executable, "-c", "import torch.utils; print(torch.utils.cmake_prefix_path)", ], - stdout=PIPE, + stdout=subprocess.PIPE, ) stdout_msg, error_msg = process_torch_prefix.communicate() return stdout_msg.decode("utf-8").rstrip("\n") def get_pytorch_use_distributed() -> bool: - from subprocess import Popen, PIPE - # need to do this in a separate process so we are not going to delete nvfuser library while it's loaded by torch - process_torch_prefix = Popen( + process_torch_prefix = subprocess.Popen( [ sys.executable, "-c", "import torch; print(torch._C._has_distributed())", ], - stdout=PIPE, + stdout=subprocess.PIPE, ) stdout_msg, _ = process_torch_prefix.communicate() stdout_msg = stdout_msg.decode("utf-8").rstrip("\n") - return bool(stdout_msg) + return ast.literal_eval(stdout_msg) if __name__ == "__main__": From 68704b79e8c87f36dc1a36a42c81f58d6320aeec Mon Sep 17 00:00:00 2001 From: Jingyue Wu Date: Mon, 29 Apr 2024 14:47:25 -0700 Subject: [PATCH 6/6] RuntimeError instead of assert. --- setup.py | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/setup.py b/setup.py index e430c85ee66..612914e5cc6 100644 --- a/setup.py +++ b/setup.py @@ -299,9 +299,10 @@ def cmake(install_prefix: str = "./nvfuser"): logger.setLevel(logger_level) - assert ( - get_pytorch_use_distributed() - ), "nvFuser requires PyTorch to be built with USE_DISTRIBUTED on." + if not get_pytorch_use_distributed(): + raise RuntimeError( + "nvFuser requires PyTorch to be built with USE_DISTRIBUTED on." + ) # generate cmake directory cmd_str = [