From a5e759b91cf8edca3344729998e6fad352707963 Mon Sep 17 00:00:00 2001 From: Jingyue Wu Date: Wed, 10 Apr 2024 16:31:08 -0700 Subject: [PATCH] Revert "NVFUSER_DISTRIBUTED instead of USE_DISTRIBUTED (#1711)" This reverts commit 9ae6c767212a12e7b28be5627744afd5965296d6. --- CMakeLists.txt | 8 -------- csrc/multidevice/communication.cpp | 2 +- csrc/multidevice/communication.h | 2 +- csrc/multidevice/communicator.cpp | 2 +- csrc/multidevice/communicator.h | 2 +- csrc/multidevice/executor.cpp | 2 +- csrc/multidevice/executor.h | 2 +- csrc/multidevice/lower_communication.cpp | 4 ++-- csrc/multidevice/lower_communication.h | 4 ++-- csrc/multidevice/utils.cpp | 2 +- setup.py | 17 +---------------- tests/cpp/multidevice.cpp | 2 +- tests/cpp/multidevice.h | 2 +- tests/cpp/test_multidevice_communications.cpp | 2 +- tests/cpp/test_multidevice_pipeline.cpp | 2 +- tests/cpp/test_multidevice_sharding.cpp | 2 +- tools/gen_nvfuser_version.py | 16 ---------------- 17 files changed, 17 insertions(+), 56 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index 86957a90cd3..f2d2b8427bb 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -15,13 +15,6 @@ set(NVFUSER_THIRD_PARTY_DIR "${NVFUSER_ROOT}/third_party") option(NVFUSER_STANDALONE_BUILD_WITH_UCC "" OFF) option(NVFUSER_BUILD_WITH_ASAN "Build nvFuser with asan" OFF) -include(CMakeDependentOption) -cmake_dependent_option(NVFUSER_DISTRIBUTED "" ON "USE_DISTRIBUTED" OFF) -if (NVFUSER_DISTRIBUTED) - add_compile_definitions(NVFUSER_DISTRIBUTED) -endif() -message(STATUS "Setting NVFUSER_DISTRIBUTED=${NVFUSER_DISTRIBUTED}") - # We try to update which C++ standard we use together in lockstep across all # built libraries, and these variables control which that is. Generally we are # on C++20, but we still support a version of CUDA (11) that does not recognize @@ -765,7 +758,6 @@ message(STATUS "******** Nvfuser configuration summary ********") message(STATUS " UCC_FOUND: ${UCC_FOUND}") message(STATUS " NVFUSER_STANDALONE_BUILD_WITH_UCC : ${NVFUSER_STANDALONE_BUILD_WITH_UCC}") message(STATUS " NVFUSER_BUILD_WITH_ASAN : ${NVFUSER_BUILD_WITH_ASAN}") -message(STATUS " NVFUSER_DISTRIBUTED : ${NVFUSER_DISTRIBUTED}") message(STATUS " NVFUSER_CPP_STANDARD : ${NVFUSER_CPP_STANDARD}") if(NVFUSER_STANDALONE_BUILD_WITH_UCC) diff --git a/csrc/multidevice/communication.cpp b/csrc/multidevice/communication.cpp index da99ce23d88..c8b87793ced 100644 --- a/csrc/multidevice/communication.cpp +++ b/csrc/multidevice/communication.cpp @@ -5,7 +5,7 @@ * SPDX-License-Identifier: BSD-3-Clause */ // clang-format on -#ifdef NVFUSER_DISTRIBUTED +#ifdef USE_DISTRIBUTED #ifdef USE_C10D_NCCL #include #endif diff --git a/csrc/multidevice/communication.h b/csrc/multidevice/communication.h index 6c18a95f054..51c96c042b8 100644 --- a/csrc/multidevice/communication.h +++ b/csrc/multidevice/communication.h @@ -6,7 +6,7 @@ */ // clang-format on #pragma once -#ifdef NVFUSER_DISTRIBUTED +#ifdef USE_DISTRIBUTED #include #include diff --git a/csrc/multidevice/communicator.cpp b/csrc/multidevice/communicator.cpp index 8ee90bebbde..d387dd2c6ab 100644 --- a/csrc/multidevice/communicator.cpp +++ b/csrc/multidevice/communicator.cpp @@ -5,7 +5,7 @@ * SPDX-License-Identifier: BSD-3-Clause */ // clang-format on -#ifdef NVFUSER_DISTRIBUTED +#ifdef USE_DISTRIBUTED #include #include diff --git a/csrc/multidevice/communicator.h b/csrc/multidevice/communicator.h index 2cb96b380f4..d2445f6a45f 100644 --- a/csrc/multidevice/communicator.h +++ b/csrc/multidevice/communicator.h @@ -6,7 +6,7 @@ */ // clang-format on #pragma once -#ifdef NVFUSER_DISTRIBUTED +#ifdef USE_DISTRIBUTED #include #include diff --git a/csrc/multidevice/executor.cpp b/csrc/multidevice/executor.cpp index 44623c37671..a6b8559b7e1 100644 --- a/csrc/multidevice/executor.cpp +++ b/csrc/multidevice/executor.cpp @@ -5,7 +5,7 @@ * SPDX-License-Identifier: BSD-3-Clause */ // clang-format on -#ifdef NVFUSER_DISTRIBUTED +#ifdef USE_DISTRIBUTED #include #include #include diff --git a/csrc/multidevice/executor.h b/csrc/multidevice/executor.h index afb782b846d..c701a18b858 100644 --- a/csrc/multidevice/executor.h +++ b/csrc/multidevice/executor.h @@ -5,7 +5,7 @@ * SPDX-License-Identifier: BSD-3-Clause */ // clang-format on -#ifdef NVFUSER_DISTRIBUTED +#ifdef USE_DISTRIBUTED #pragma once #include diff --git a/csrc/multidevice/lower_communication.cpp b/csrc/multidevice/lower_communication.cpp index 386ef48d7d8..dcfbc592071 100644 --- a/csrc/multidevice/lower_communication.cpp +++ b/csrc/multidevice/lower_communication.cpp @@ -5,7 +5,7 @@ * SPDX-License-Identifier: BSD-3-Clause */ // clang-format on -#ifdef NVFUSER_DISTRIBUTED +#ifdef USE_DISTRIBUTED #include #include #include @@ -581,7 +581,7 @@ bool isLowerableToCommunication(Expr* expr) { } // namespace nvfuser -#else // NVFUSER_DISTRIBUTED +#else // USE_DISTRIBUTED #include diff --git a/csrc/multidevice/lower_communication.h b/csrc/multidevice/lower_communication.h index 125b7c3f76a..b4b973ca59f 100644 --- a/csrc/multidevice/lower_communication.h +++ b/csrc/multidevice/lower_communication.h @@ -5,7 +5,7 @@ * SPDX-License-Identifier: BSD-3-Clause */ // clang-format on -#ifdef NVFUSER_DISTRIBUTED +#ifdef USE_DISTRIBUTED #pragma once #include @@ -27,7 +27,7 @@ std::vector> lowerCommunication( at::Tensor output_tensor); } // namespace nvfuser -#else // NVFUSER_DISTRIBUTED +#else namespace nvfuser { diff --git a/csrc/multidevice/utils.cpp b/csrc/multidevice/utils.cpp index de223f0c035..a9bc97689ce 100644 --- a/csrc/multidevice/utils.cpp +++ b/csrc/multidevice/utils.cpp @@ -21,7 +21,7 @@ namespace nvfuser { NVF_API bool distributedEnabled() { -#ifdef NVFUSER_DISTRIBUTED +#ifdef USE_DISTRIBUTED return true; #else return false; diff --git a/setup.py b/setup.py index f589de9810f..c4179b6afe9 100644 --- a/setup.py +++ b/setup.py @@ -23,9 +23,6 @@ # --build-with-ucc # Build nvfuser with UCC support. You may need to specify environment variables of UCC_HOME, UCC_DIR, UCX_HOME, UCX_DIR. # -# --build-without-distributed -# Build nvfuser without multidevice support -# # --debug # Building nvfuser in debug mode # @@ -71,7 +68,6 @@ NO_NINJA = False BUILD_WITH_UCC = False BUILD_WITH_ASAN = False -BUILD_WITHOUT_DISTRIBUTED = False OVERWRITE_VERSION = False VERSION_TAG = None BUILD_TYPE = "Release" @@ -103,9 +99,6 @@ if arg == "--build-with-asan": BUILD_WITH_ASAN = True continue - if arg == "--build-without-distributed": - BUILD_WITHOUT_DISTRIBUTED = True - continue if arg == "--debug": BUILD_TYPE = "Debug" continue @@ -286,10 +279,7 @@ def cmake(install_prefix: str = "./nvfuser"): if not os.path.exists(cmake_build_dir): os.makedirs(cmake_build_dir) - from tools.gen_nvfuser_version import ( - get_pytorch_cmake_prefix, - get_pytorch_use_distributed, - ) + from tools.gen_nvfuser_version import get_pytorch_cmake_prefix # this is used to suppress import error. # so we can get the right pytorch prefix for cmake @@ -303,8 +293,6 @@ def cmake(install_prefix: str = "./nvfuser"): logger.setLevel(logger_level) - pytorch_use_distributed = get_pytorch_use_distributed() - # generate cmake directory cmd_str = [ get_cmake_bin(), @@ -312,7 +300,6 @@ def cmake(install_prefix: str = "./nvfuser"): "-DCMAKE_BUILD_TYPE=" + BUILD_TYPE, f"-DCMAKE_INSTALL_PREFIX={install_prefix}", f"-DNVFUSER_CPP_STANDARD={CPP_STANDARD}", - f"-DUSE_DISTRIBUTED={pytorch_use_distributed}", "-B", cmake_build_dir, ] @@ -330,8 +317,6 @@ def cmake(install_prefix: str = "./nvfuser"): cmd_str.append("-DBUILD_NVFUSER_BENCHMARK=ON") if BUILD_WITH_ASAN: cmd_str.append("-DNVFUSER_BUILD_WITH_ASAN=ON") - if BUILD_WITHOUT_DISTRIBUTED: - cmd_str.append("-DNVFUSER_DISTRIBUTED=OFF") cmd_str.append(".") print(f"Configuring CMake with {' '.join(cmd_str)}") diff --git a/tests/cpp/multidevice.cpp b/tests/cpp/multidevice.cpp index 2de09b30fad..26b41d705a2 100644 --- a/tests/cpp/multidevice.cpp +++ b/tests/cpp/multidevice.cpp @@ -5,7 +5,7 @@ * SPDX-License-Identifier: BSD-3-Clause */ // clang-format on -#ifdef NVFUSER_DISTRIBUTED +#ifdef USE_DISTRIBUTED #include #include #include diff --git a/tests/cpp/multidevice.h b/tests/cpp/multidevice.h index e9b5b6f9a54..093db4471f5 100644 --- a/tests/cpp/multidevice.h +++ b/tests/cpp/multidevice.h @@ -5,7 +5,7 @@ * SPDX-License-Identifier: BSD-3-Clause */ // clang-format on -#ifdef NVFUSER_DISTRIBUTED +#ifdef USE_DISTRIBUTED #pragma once #include diff --git a/tests/cpp/test_multidevice_communications.cpp b/tests/cpp/test_multidevice_communications.cpp index 62007815aa2..8ef17e8f8e3 100644 --- a/tests/cpp/test_multidevice_communications.cpp +++ b/tests/cpp/test_multidevice_communications.cpp @@ -5,7 +5,7 @@ * SPDX-License-Identifier: BSD-3-Clause */ // clang-format on -#ifdef NVFUSER_DISTRIBUTED +#ifdef USE_DISTRIBUTED #include #include diff --git a/tests/cpp/test_multidevice_pipeline.cpp b/tests/cpp/test_multidevice_pipeline.cpp index 1c231191a7a..68ad1a0bea5 100644 --- a/tests/cpp/test_multidevice_pipeline.cpp +++ b/tests/cpp/test_multidevice_pipeline.cpp @@ -5,7 +5,7 @@ * SPDX-License-Identifier: BSD-3-Clause */ // clang-format on -#ifdef NVFUSER_DISTRIBUTED +#ifdef USE_DISTRIBUTED #include #include diff --git a/tests/cpp/test_multidevice_sharding.cpp b/tests/cpp/test_multidevice_sharding.cpp index 6a62c6f4366..b7108ba338c 100644 --- a/tests/cpp/test_multidevice_sharding.cpp +++ b/tests/cpp/test_multidevice_sharding.cpp @@ -5,7 +5,7 @@ * SPDX-License-Identifier: BSD-3-Clause */ // clang-format on -#ifdef NVFUSER_DISTRIBUTED +#ifdef USE_DISTRIBUTED #include #include #include diff --git a/tools/gen_nvfuser_version.py b/tools/gen_nvfuser_version.py index 789aa96d37a..7537ff3ad4a 100644 --- a/tools/gen_nvfuser_version.py +++ b/tools/gen_nvfuser_version.py @@ -45,22 +45,6 @@ def get_pytorch_cmake_prefix(): return stdout_msg.decode("utf-8").rstrip("\n") -def get_pytorch_use_distributed(): - from subprocess import Popen, PIPE - - # need to do this in a separate process so we are not going to delete nvfuser library while it's loaded by torch - process_torch_prefix = Popen( - [ - sys.executable, - "-c", - "import torch; print(torch._C._has_distributed())", - ], - stdout=PIPE, - ) - stdout_msg, error_msg = process_torch_prefix.communicate() - return stdout_msg.decode("utf-8").rstrip("\n") - - if __name__ == "__main__": version_file = nvfuser_root / "nvfuser" / "version.py" with open(version_file, "w") as f: