Skip to content

Commit

Permalink
Revert "NVFUSER_DISTRIBUTED instead of USE_DISTRIBUTED (#1711)"
Browse files Browse the repository at this point in the history
This reverts commit 9ae6c76.
  • Loading branch information
wujingyue committed Apr 10, 2024
1 parent 618c4b7 commit a5e759b
Show file tree
Hide file tree
Showing 17 changed files with 17 additions and 56 deletions.
8 changes: 0 additions & 8 deletions CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -15,13 +15,6 @@ set(NVFUSER_THIRD_PARTY_DIR "${NVFUSER_ROOT}/third_party")
option(NVFUSER_STANDALONE_BUILD_WITH_UCC "" OFF)
option(NVFUSER_BUILD_WITH_ASAN "Build nvFuser with asan" OFF)

include(CMakeDependentOption)
cmake_dependent_option(NVFUSER_DISTRIBUTED "" ON "USE_DISTRIBUTED" OFF)
if (NVFUSER_DISTRIBUTED)
add_compile_definitions(NVFUSER_DISTRIBUTED)
endif()
message(STATUS "Setting NVFUSER_DISTRIBUTED=${NVFUSER_DISTRIBUTED}")

# We try to update which C++ standard we use together in lockstep across all
# built libraries, and these variables control which that is. Generally we are
# on C++20, but we still support a version of CUDA (11) that does not recognize
Expand Down Expand Up @@ -765,7 +758,6 @@ message(STATUS "******** Nvfuser configuration summary ********")
message(STATUS " UCC_FOUND: ${UCC_FOUND}")
message(STATUS " NVFUSER_STANDALONE_BUILD_WITH_UCC : ${NVFUSER_STANDALONE_BUILD_WITH_UCC}")
message(STATUS " NVFUSER_BUILD_WITH_ASAN : ${NVFUSER_BUILD_WITH_ASAN}")
message(STATUS " NVFUSER_DISTRIBUTED : ${NVFUSER_DISTRIBUTED}")
message(STATUS " NVFUSER_CPP_STANDARD : ${NVFUSER_CPP_STANDARD}")

if(NVFUSER_STANDALONE_BUILD_WITH_UCC)
Expand Down
2 changes: 1 addition & 1 deletion csrc/multidevice/communication.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@
* SPDX-License-Identifier: BSD-3-Clause
*/
// clang-format on
#ifdef NVFUSER_DISTRIBUTED
#ifdef USE_DISTRIBUTED
#ifdef USE_C10D_NCCL
#include <torch/csrc/distributed/c10d/ProcessGroupNCCL.hpp>
#endif
Expand Down
2 changes: 1 addition & 1 deletion csrc/multidevice/communication.h
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@
*/
// clang-format on
#pragma once
#ifdef NVFUSER_DISTRIBUTED
#ifdef USE_DISTRIBUTED

#include <multidevice/communicator.h>
#include <multidevice/multidevice.h>
Expand Down
2 changes: 1 addition & 1 deletion csrc/multidevice/communicator.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@
* SPDX-License-Identifier: BSD-3-Clause
*/
// clang-format on
#ifdef NVFUSER_DISTRIBUTED
#ifdef USE_DISTRIBUTED
#include <netdb.h>

#include <multidevice/communicator.h>
Expand Down
2 changes: 1 addition & 1 deletion csrc/multidevice/communicator.h
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@
*/
// clang-format on
#pragma once
#ifdef NVFUSER_DISTRIBUTED
#ifdef USE_DISTRIBUTED

#include <exceptions.h>
#include <multidevice/multidevice.h>
Expand Down
2 changes: 1 addition & 1 deletion csrc/multidevice/executor.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@
* SPDX-License-Identifier: BSD-3-Clause
*/
// clang-format on
#ifdef NVFUSER_DISTRIBUTED
#ifdef USE_DISTRIBUTED
#include <ATen/cuda/CUDAContext.h>
#include <device_lower/utils.h>
#include <fusion_segmenter.h>
Expand Down
2 changes: 1 addition & 1 deletion csrc/multidevice/executor.h
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@
* SPDX-License-Identifier: BSD-3-Clause
*/
// clang-format on
#ifdef NVFUSER_DISTRIBUTED
#ifdef USE_DISTRIBUTED
#pragma once

#include <c10/core/DeviceType.h>
Expand Down
4 changes: 2 additions & 2 deletions csrc/multidevice/lower_communication.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@
* SPDX-License-Identifier: BSD-3-Clause
*/
// clang-format on
#ifdef NVFUSER_DISTRIBUTED
#ifdef USE_DISTRIBUTED
#include <device_lower/utils.h>
#include <ir/interface_nodes.h>
#include <multidevice/device_mesh.h>
Expand Down Expand Up @@ -581,7 +581,7 @@ bool isLowerableToCommunication(Expr* expr) {

} // namespace nvfuser

#else // NVFUSER_DISTRIBUTED
#else // USE_DISTRIBUTED

#include <ir/base_nodes.h>

Expand Down
4 changes: 2 additions & 2 deletions csrc/multidevice/lower_communication.h
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@
* SPDX-License-Identifier: BSD-3-Clause
*/
// clang-format on
#ifdef NVFUSER_DISTRIBUTED
#ifdef USE_DISTRIBUTED
#pragma once

#include <ir/base_nodes.h>
Expand All @@ -27,7 +27,7 @@ std::vector<std::shared_ptr<Communication>> lowerCommunication(
at::Tensor output_tensor);
} // namespace nvfuser

#else // NVFUSER_DISTRIBUTED
#else

namespace nvfuser {

Expand Down
2 changes: 1 addition & 1 deletion csrc/multidevice/utils.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,7 @@
namespace nvfuser {

NVF_API bool distributedEnabled() {
#ifdef NVFUSER_DISTRIBUTED
#ifdef USE_DISTRIBUTED
return true;
#else
return false;
Expand Down
17 changes: 1 addition & 16 deletions setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,9 +23,6 @@
# --build-with-ucc
# Build nvfuser with UCC support. You may need to specify environment variables of UCC_HOME, UCC_DIR, UCX_HOME, UCX_DIR.
#
# --build-without-distributed
# Build nvfuser without multidevice support
#
# --debug
# Building nvfuser in debug mode
#
Expand Down Expand Up @@ -71,7 +68,6 @@
NO_NINJA = False
BUILD_WITH_UCC = False
BUILD_WITH_ASAN = False
BUILD_WITHOUT_DISTRIBUTED = False
OVERWRITE_VERSION = False
VERSION_TAG = None
BUILD_TYPE = "Release"
Expand Down Expand Up @@ -103,9 +99,6 @@
if arg == "--build-with-asan":
BUILD_WITH_ASAN = True
continue
if arg == "--build-without-distributed":
BUILD_WITHOUT_DISTRIBUTED = True
continue
if arg == "--debug":
BUILD_TYPE = "Debug"
continue
Expand Down Expand Up @@ -286,10 +279,7 @@ def cmake(install_prefix: str = "./nvfuser"):
if not os.path.exists(cmake_build_dir):
os.makedirs(cmake_build_dir)

from tools.gen_nvfuser_version import (
get_pytorch_cmake_prefix,
get_pytorch_use_distributed,
)
from tools.gen_nvfuser_version import get_pytorch_cmake_prefix

# this is used to suppress import error.
# so we can get the right pytorch prefix for cmake
Expand All @@ -303,16 +293,13 @@ def cmake(install_prefix: str = "./nvfuser"):

logger.setLevel(logger_level)

pytorch_use_distributed = get_pytorch_use_distributed()

# generate cmake directory
cmd_str = [
get_cmake_bin(),
pytorch_cmake_config,
"-DCMAKE_BUILD_TYPE=" + BUILD_TYPE,
f"-DCMAKE_INSTALL_PREFIX={install_prefix}",
f"-DNVFUSER_CPP_STANDARD={CPP_STANDARD}",
f"-DUSE_DISTRIBUTED={pytorch_use_distributed}",
"-B",
cmake_build_dir,
]
Expand All @@ -330,8 +317,6 @@ def cmake(install_prefix: str = "./nvfuser"):
cmd_str.append("-DBUILD_NVFUSER_BENCHMARK=ON")
if BUILD_WITH_ASAN:
cmd_str.append("-DNVFUSER_BUILD_WITH_ASAN=ON")
if BUILD_WITHOUT_DISTRIBUTED:
cmd_str.append("-DNVFUSER_DISTRIBUTED=OFF")
cmd_str.append(".")

print(f"Configuring CMake with {' '.join(cmd_str)}")
Expand Down
2 changes: 1 addition & 1 deletion tests/cpp/multidevice.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@
* SPDX-License-Identifier: BSD-3-Clause
*/
// clang-format on
#ifdef NVFUSER_DISTRIBUTED
#ifdef USE_DISTRIBUTED
#include <fusion_segmenter.h>
#include <ir/all_nodes.h>
#include <multidevice/utils.h>
Expand Down
2 changes: 1 addition & 1 deletion tests/cpp/multidevice.h
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@
* SPDX-License-Identifier: BSD-3-Clause
*/
// clang-format on
#ifdef NVFUSER_DISTRIBUTED
#ifdef USE_DISTRIBUTED
#pragma once

#include <multidevice/communication.h>
Expand Down
2 changes: 1 addition & 1 deletion tests/cpp/test_multidevice_communications.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@
* SPDX-License-Identifier: BSD-3-Clause
*/
// clang-format on
#ifdef NVFUSER_DISTRIBUTED
#ifdef USE_DISTRIBUTED
#include <gtest/gtest.h>

#include <multidevice/communication.h>
Expand Down
2 changes: 1 addition & 1 deletion tests/cpp/test_multidevice_pipeline.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@
* SPDX-License-Identifier: BSD-3-Clause
*/
// clang-format on
#ifdef NVFUSER_DISTRIBUTED
#ifdef USE_DISTRIBUTED
#include <gtest/gtest.h>

#include <codegen.h>
Expand Down
2 changes: 1 addition & 1 deletion tests/cpp/test_multidevice_sharding.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@
* SPDX-License-Identifier: BSD-3-Clause
*/
// clang-format on
#ifdef NVFUSER_DISTRIBUTED
#ifdef USE_DISTRIBUTED
#include <disjoint_set.h>
#include <fusion.h>
#include <fusion_segmenter.h>
Expand Down
16 changes: 0 additions & 16 deletions tools/gen_nvfuser_version.py
Original file line number Diff line number Diff line change
Expand Up @@ -45,22 +45,6 @@ def get_pytorch_cmake_prefix():
return stdout_msg.decode("utf-8").rstrip("\n")


def get_pytorch_use_distributed():
from subprocess import Popen, PIPE

# need to do this in a separate process so we are not going to delete nvfuser library while it's loaded by torch
process_torch_prefix = Popen(
[
sys.executable,
"-c",
"import torch; print(torch._C._has_distributed())",
],
stdout=PIPE,
)
stdout_msg, error_msg = process_torch_prefix.communicate()
return stdout_msg.decode("utf-8").rstrip("\n")


if __name__ == "__main__":
version_file = nvfuser_root / "nvfuser" / "version.py"
with open(version_file, "w") as f:
Expand Down

0 comments on commit a5e759b

Please sign in to comment.