Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[fbgemm_gpu] Modularize CMake Build [3/N] #3408

Open
wants to merge 1 commit into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
63 changes: 46 additions & 17 deletions cmake/modules/GpuCppLibrary.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@ include(${CMAKE_CURRENT_SOURCE_DIR}/../cmake/modules/Utilities.cmake)

function(prepare_target_sources)
# This function does the following:
#
# 1. Take all the specified project sources for a target
# 1. Filter files out based on CPU-only, CUDA, and HIP build modes
# 1. Bucketize them into sets of CXX, CU, and HIP files
Expand Down Expand Up @@ -134,14 +135,20 @@ endfunction()

function(gpu_cpp_library)
# This function does the following:
#
# 1. Take all the target sources and select relevant sources based on build type (CPU-only, CUDA, HIP)
# 1. Apply source file properties as needed
# 1. HIPify files as needed
# 1. Build the .SO file
# 1. Fetch the HIPified versions of the files as needed (presumes that `hipify()` has already been run)
# 1. Build the .SO file, either as STATIC or MODULE
#
# Building as STATIC allows the target to be linked to other library targets:
# https://www.reddit.com/r/cpp_questions/comments/120p0ey/how_to_create_a_composite_shared_library_out_of
# https://github.com/ROCm/hipDNN/blob/master/Examples/hipdnn-training/cmake/FindHIP.cmake

set(flags)
set(singleValueArgs
PREFIX # Desired name prefix for the library target
PREFIX # Desired name for the library target (and by extension, the prefix for naming intermediate targets)
TYPE # Target type, e.g., MODULE, OBJECT. See https://cmake.org/cmake/help/latest/command/add_library.html
)
set(multiValueArgs
CPU_SRCS # Sources for CPU-only build
Expand All @@ -151,6 +158,7 @@ function(gpu_cpp_library)
OTHER_SRCS # Sources from third-party libraries
GPU_FLAGS # Compile flags for GPU builds
INCLUDE_DIRS # Include directories for compilation
DEPS # Target dependencies, i.e. built STATIC targets
)

cmake_parse_arguments(
Expand All @@ -162,6 +170,8 @@ function(gpu_cpp_library)
# Prepare CXX and CU sources
############################################################################

# Take all the sources, and filter them into CPU and GPU buckets depending
# on the source type and build mode
prepare_target_sources(
PREFIX ${args_PREFIX}
CPU_SRCS ${args_CPU_SRCS}
Expand All @@ -172,15 +182,25 @@ function(gpu_cpp_library)
INCLUDE_DIRS ${args_INCLUDE_DIRS})
set(lib_sources ${${args_PREFIX}_sources})

############################################################################
# Prepare Target Deps
############################################################################

# Convert target dependency references into CMake target-dependent expressions
# See https://cmake.org/cmake/help/latest/manual/cmake-generator-expressions.7.html#id34
set(target_deps)
foreach(dep ${args_DEPS})
list(APPEND target_deps "$<TARGET_OBJECTS:${dep}>")
endforeach()

############################################################################
# Build the Library
############################################################################

set(lib_name ${args_PREFIX}_py)
set(lib_name ${args_PREFIX})
if(USE_ROCM)
# Fetch the equivalent HIPified sources if available.
# This presumes that hipify() has already been run.
# This presumes that `hipify()` has already been run.
get_hipified_list("${lib_sources}" lib_sources_hipified)

# Set properties for the HIPified sources
Expand All @@ -191,9 +211,10 @@ function(gpu_cpp_library)
hip_include_directories("${args_INCLUDE_DIRS}")

# Create the HIP library
hip_add_library(${lib_name} SHARED
hip_add_library(${lib_name} ${args_TYPE}
${lib_sources_hipified}
${args_OTHER_SRCS}
${target_deps}
${FBGEMM_HIP_HCC_LIBRARIES}
HIPCC_OPTIONS
${HIP_HCC_FLAGS})
Expand All @@ -206,10 +227,11 @@ function(gpu_cpp_library)
${args_INCLUDE_DIRS})

else()
# Create the C++/CUDA library
add_library(${lib_name} MODULE
# Create the CPU-only / CUDA library
add_library(${lib_name} ${args_TYPE}
${lib_sources}
${args_OTHER_SRCS})
${args_OTHER_SRCS}
${target_deps})
endif()

############################################################################
Expand All @@ -221,9 +243,14 @@ function(gpu_cpp_library)
${TORCH_INCLUDE_DIRS}
${NCCL_INCLUDE_DIRS})

# Remove `lib` from the output artifact name, i.e. `libfoo.so` -> `foo.so`
set_target_properties(${lib_name}
PROPERTIES PREFIX "")
# Set additional target properties
set_target_properties(${lib_name} PROPERTIES
# Remove `lib` prefix from the output artifact name, e.g. `libfoo.so` -> `foo.so`
PREFIX ""
# Enforce -fPIC for STATIC library option, since they are to be
# integrated into other libraries down the line
# https://stackoverflow.com/questions/3961446/why-does-gcc-not-implicitly-supply-the-fpic-flag-when-compiling-static-librarie
POSITION_INDEPENDENT_CODE ON)

# Link to PyTorch
target_link_libraries(${lib_name}
Expand All @@ -236,7 +263,7 @@ function(gpu_cpp_library)
target_link_libraries(${lib_name} ${NVML_LIB_PATH})
endif()

# Silence warnings (in asmjit)
# Silence compiler warnings (in asmjit)
target_compile_options(${lib_name} PRIVATE
-Wno-deprecated-anon-enum-enum-conversion
-Wno-deprecated-declarations)
Expand All @@ -251,18 +278,17 @@ function(gpu_cpp_library)
WORKING_DIRECTORY ${OUTPUT_DIR}
COMMAND bash ${FBGEMM}/.github/scripts/fbgemm_gpu_postbuild.bash)

# Run the post-build steps AFTER the build itself
# Set the post-build steps to run AFTER the build completes
add_dependencies(${lib_name}_postbuild ${lib_name})

############################################################################
# Set the Output Variable(s)
############################################################################

# PREFIX = `foo` --> Target Library = `foo_py`
set(${args_PREFIX}_py ${lib_name} PARENT_SCOPE)
set(${args_PREFIX} ${lib_name} PARENT_SCOPE)

BLOCK_PRINT(
"GPU CPP Library Target: ${args_PREFIX}"
"GPU CPP Library Target: ${args_PREFIX} (${args_TYPE})"
" "
"CPU_SRCS:"
"${args_CPU_SRCS}"
Expand Down Expand Up @@ -291,6 +317,9 @@ function(gpu_cpp_library)
"HIPified Source Files:"
"${lib_sources_hipified}"
" "
"Target Dependencies:"
"${target_deps}"
" "
"Output Library:"
"${lib_name}"
)
Expand Down
5 changes: 4 additions & 1 deletion fbgemm_gpu/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -151,7 +151,10 @@ if(USE_ROCM)
${CMAKE_CURRENT_SOURCE_DIR}/experimental/gen_ai)

# HIPify all .CU and .CUH sources under the current directory (`/fbgemm_gpu`)
# .H sources are not automatically HIPified, so they need #ifdef USE_ROCM guards
#
# Note that .H sources are not automatically HIPified, so if they reference
# CUDA-specific code, e.g. `#include <c10/cuda/CUDAStream.h>`, they will need
# to be updated with `#ifdef USE_ROCM` guards.
hipify(
CUDA_SOURCE_DIR
${PROJECT_SOURCE_DIR}
Expand Down
89 changes: 24 additions & 65 deletions fbgemm_gpu/FbgemmGpu.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -308,53 +308,6 @@ list(APPEND gen_defused_optim_py_files
${CMAKE_BINARY_DIR}/optimizer_args.py)


################################################################################
# FBGEMM_GPU Generated Sources
################################################################################

if(CXX_AVX2_FOUND)
set_source_files_properties(${gen_cpu_source_files}
PROPERTIES COMPILE_OPTIONS "${AVX2_FLAGS}")
else()
set_source_files_properties(${gen_cpu_source_files}
PROPERTIES COMPILE_OPTIONS "-fopenmp")
endif()

set_source_files_properties(${gen_cpu_source_files}
PROPERTIES INCLUDE_DIRECTORIES
"${fbgemm_sources_include_directories}")

set_source_files_properties(${gen_gpu_host_source_files}
PROPERTIES INCLUDE_DIRECTORIES
"${fbgemm_sources_include_directories}")

set_source_files_properties(${gen_gpu_kernel_source_files}
PROPERTIES INCLUDE_DIRECTORIES
"${fbgemm_sources_include_directories}")

set_source_files_properties(${gen_gpu_kernel_source_files}
PROPERTIES COMPILE_OPTIONS
"${TORCH_CUDA_OPTIONS}")

set_source_files_properties(${gen_defused_optim_source_files}
PROPERTIES INCLUDE_DIRECTORIES
"${fbgemm_sources_include_directories}")

if(NOT FBGEMM_CPU_ONLY)
set(fbgemm_gpu_sources_gen
${gen_gpu_kernel_source_files}
${gen_gpu_host_source_files}
${gen_cpu_source_files}
${gen_defused_optim_source_files})
else()
set(fbgemm_gpu_sources_gen
${gen_cpu_source_files}
# To force generate_embedding_optimizer to generate Python files
${gen_defused_optim_py_files}
)
endif()


################################################################################
# FBGEMM (not FBGEMM_GPU) Sources
################################################################################
Expand Down Expand Up @@ -437,7 +390,7 @@ set(fbgemm_gpu_sources_cpu_static
src/sparse_ops/sparse_async_cumsum.cpp
src/sparse_ops/sparse_ops_cpu.cpp
src/sparse_ops/sparse_ops_meta.cpp
src/embedding_inplace_ops/embedding_inplace_update_cpu.cpp
# src/embedding_inplace_ops/embedding_inplace_update_cpu.cpp
src/split_embeddings_cache/linearize_cache_indices.cpp
src/split_embeddings_cache/lfu_cache_populate_byte.cpp
src/split_embeddings_cache/lru_cache_populate_byte.cpp
Expand All @@ -459,7 +412,7 @@ if(NOT FBGEMM_CPU_ONLY)
src/sparse_ops/sparse_ops_gpu.cpp
src/split_embeddings_utils/split_embeddings_utils.cpp
src/metric_ops/metric_ops_host.cpp
src/embedding_inplace_ops/embedding_inplace_update_gpu.cpp
# src/embedding_inplace_ops/embedding_inplace_update_gpu.cpp
src/input_combine_ops/input_combine_gpu.cpp
codegen/training/index_select/batch_index_select_dim0_host.cpp)

Expand All @@ -478,7 +431,7 @@ if(NOT FBGEMM_CPU_ONLY)
codegen/utils/embedding_bounds_check_v1.cu
codegen/utils/embedding_bounds_check_v2.cu
codegen/inference/embedding_forward_quantized_split_lookup.cu
src/embedding_inplace_ops/embedding_inplace_update.cu
# src/embedding_inplace_ops/embedding_inplace_update.cu
src/histogram_binning_calibration_ops.cu
src/input_combine_ops/input_combine.cu
src/intraining_embedding_pruning_ops/intraining_embedding_pruning.cu
Expand Down Expand Up @@ -552,7 +505,7 @@ endif()


################################################################################
# FBGEMM_GPU HIP Code Generation
# FBGEMM_GPU Generated Sources Organized
################################################################################

set(fbgemm_gpu_sources_cpu_gen
Expand Down Expand Up @@ -580,36 +533,42 @@ endif()
# FBGEMM_GPU C++ Modules
################################################################################

# Test target to demonstrate that target deps works as intended
gpu_cpp_library(
PREFIX
fbgemm_gpu
embedding_inplace_ops
TYPE
STATIC
INCLUDE_DIRS
${fbgemm_sources_include_directories}
CPU_SRCS
${fbgemm_gpu_sources_cpu_static}
${fbgemm_gpu_sources_cpu_gen}
src/embedding_inplace_ops/embedding_inplace_update_cpu.cpp
GPU_SRCS
${fbgemm_gpu_sources_gpu_static}
${fbgemm_gpu_sources_gpu_gen}
OTHER_SRCS
${asmjit_sources}
${fbgemm_sources}
src/embedding_inplace_ops/embedding_inplace_update_gpu.cpp
src/embedding_inplace_ops/embedding_inplace_update.cu
GPU_FLAGS
${TORCH_CUDA_OPTIONS})

# TODO: Test target, need to properly integrate into FBGEMM_GPU main build
gpu_cpp_library(
PREFIX
embedding_inplace_ops
fbgemm_gpu_py
TYPE
MODULE
INCLUDE_DIRS
${fbgemm_sources_include_directories}
CPU_SRCS
src/embedding_inplace_ops/embedding_inplace_update_cpu.cpp
${fbgemm_gpu_sources_cpu_static}
${fbgemm_gpu_sources_cpu_gen}
GPU_SRCS
src/embedding_inplace_ops/embedding_inplace_update_gpu.cpp
src/embedding_inplace_ops/embedding_inplace_update.cu
${fbgemm_gpu_sources_gpu_static}
${fbgemm_gpu_sources_gpu_gen}
OTHER_SRCS
${asmjit_sources}
${fbgemm_sources}
GPU_FLAGS
${TORCH_CUDA_OPTIONS})
${TORCH_CUDA_OPTIONS}
DEPS
embedding_inplace_ops)


################################################################################
Expand Down
4 changes: 3 additions & 1 deletion fbgemm_gpu/experimental/example/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,9 @@ set(experimental_example_python_source_files

gpu_cpp_library(
PREFIX
fbgemm_gpu_experimental_example
fbgemm_gpu_experimental_example_py
TYPE
MODULE
INCLUDE_DIRS
${fbgemm_sources_include_directories}
GPU_SRCS
Expand Down
4 changes: 3 additions & 1 deletion fbgemm_gpu/experimental/gen_ai/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -52,7 +52,9 @@ file(GLOB_RECURSE experimental_gen_ai_python_source_files

gpu_cpp_library(
PREFIX
fbgemm_gpu_experimental_gen_ai
fbgemm_gpu_experimental_gen_ai_py
TYPE
MODULE
INCLUDE_DIRS
${fbgemm_sources_include_directories}
${CMAKE_CURRENT_SOURCE_DIR}/src/quantize
Expand Down
Loading