Skip to content

Commit

Permalink
Add CPU CMake extension.
Browse files Browse the repository at this point in the history
  • Loading branch information
bigPYJ1151 committed Mar 19, 2024
1 parent b37cdce commit d86a64a
Show file tree
Hide file tree
Showing 3 changed files with 108 additions and 4 deletions.
16 changes: 16 additions & 0 deletions CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,10 @@ cmake_minimum_required(VERSION 3.21)

project(vllm_extensions LANGUAGES CXX)

option(VLLM_TARGET_DEVICE "Target device backend for vLLM" "cpu")

message(STATUS "Build type: ${CMAKE_BUILD_TYPE}")
message(STATUS "Target device: ${VLLM_TARGET_DEVICE}")

include(${CMAKE_CURRENT_LIST_DIR}/cmake/utils.cmake)

Expand Down Expand Up @@ -69,6 +72,19 @@ find_package(Torch REQUIRED)
#
append_torchlib_if_found(torch_python)

#
# Forward the non-CUDA device extensions to external CMake scripts.
#
if (NOT VLLM_TARGET_DEVICE STREQUAL "cuda" AND
NOT VLLM_TARGET_DEVICE STREQUAL "rocm")
if (VLLM_TARGET_DEVICE STREQUAL "cpu")
include(${CMAKE_CURRENT_LIST_DIR}/cmake/cpu_extension.cmake)
else()
message(FATAL_ERROR "Unsupported vLLM target device: ${VLLM_TARGET_DEVICE}")
endif()
return()
endif()

#
# Set up GPU language and check the torch version and warn if it isn't
# what is expected.
Expand Down
77 changes: 77 additions & 0 deletions cmake/cpu_extension.cmake
Original file line number Diff line number Diff line change
@@ -0,0 +1,77 @@
#
# Check the compile flags
#
list(APPEND CXX_COMPILE_FLAGS
"-fopenmp"
"-DVLLM_CPU_EXTENSION")

execute_process(COMMAND cat /proc/cpuinfo
RESULT_VARIABLE CPUINFO_RET
OUTPUT_VARIABLE CPUINFO)

if (NOT CPUINFO_RET EQUAL 0)
message(FATAL_ERROR "Failed to check CPU features via /proc/cpuinfo")
endif()

function (find_isa CPUINFO TARGET OUT)
string(FIND ${CPUINFO} ${TARGET} ISA_FOUND)
if(NOT ISA_FOUND EQUAL -1)
set(${OUT} ON PARENT_SCOPE)
else()
set(${OUT} OFF PARENT_SCOPE)
endif()
endfunction()

find_isa(${CPUINFO} "avx512f" AVX512_FOUND)

if (AVX512_FOUND)
list(APPEND CXX_COMPILE_FLAGS
"-mavx512f"
"-mavx512vl"
"-mavx512bw"
"-mavx512dq")

find_isa(${CPUINFO} "avx512_bf16" AVX512BF16_FOUND)
if (AVX512BF16_FOUND AND
CMAKE_CXX_COMPILER_ID STREQUAL "GNU" AND
CMAKE_CXX_COMPILER_VERSION VERSION_GREATER_EQUAL 12.3)
list(APPEND CXX_COMPILE_FLAGS "-mavx512bf16")
else()
message(WARNING "Disable AVX512-BF16 ISA support, requires gcc/g++ >= 12.3")
endif()
else()
message(FATAL_ERROR "vLLM CPU backend requires AVX512 ISA support.")
endif()

message(STATUS "CPU extension compile flags: ${CXX_COMPILE_FLAGS}")

message(FATAL_ERROR "vLLM CPU backend is unavailable")

#
# Define extension targets
#

#
# _C extension
#
set(VLLM_EXT_SRC
"csrc/cpu/activation.cpp"
"csrc/cpu/attention.cpp"
"csrc/cpu/cache.cpp"
"csrc/cpu/layernorm.cpp"
"csrc/cpu/pos_encoding.cpp"
"csrc/pybind.cpp")

define_gpu_extension_target(
_C
DESTINATION vllm
LANGUAGE CXX
SOURCES ${VLLM_EXT_SRC}
COMPILE_FLAGS ${CXX_COMPILE_FLAGS}
WITH_SOABI
)

add_custom_target(default)
message(STATUS "Enabling C extension.")
add_dependencies(default _C)

19 changes: 15 additions & 4 deletions setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,8 @@
from torch.utils.cpp_extension import CUDA_HOME

ROOT_DIR = os.path.dirname(__file__)
# Target device of vLLM, supporting [cuda (by default), rocm, neuron]
VLLM_TARGET_DEVICE = os.getenv("VLLM_TARGET_DEVICE", "cuda")

# vLLM only supports Linux platform
assert sys.platform.startswith(
Expand Down Expand Up @@ -61,8 +63,7 @@ def compute_num_jobs(self):
except AttributeError:
num_jobs = os.cpu_count()

nvcc_cuda_version = get_nvcc_cuda_version()
if nvcc_cuda_version >= Version("11.2"):
if _is_cuda() and get_nvcc_cuda_version() >= Version("11.2"):
nvcc_threads = int(os.getenv("NVCC_THREADS", 8))
num_jobs = max(1, round(num_jobs / (nvcc_threads / 4)))
else:
Expand Down Expand Up @@ -95,6 +96,7 @@ def configure(self, ext: CMakeExtension) -> None:
'-DCMAKE_BUILD_TYPE={}'.format(cfg),
'-DCMAKE_LIBRARY_OUTPUT_DIRECTORY={}'.format(outdir),
'-DCMAKE_ARCHIVE_OUTPUT_DIRECTORY={}'.format(self.build_temp),
'-DVLLM_TARGET_DEVICE={}'.format(VLLM_TARGET_DEVICE),
]

verbose = bool(int(os.getenv('VERBOSE', '0')))
Expand Down Expand Up @@ -168,11 +170,12 @@ def build_extensions(self) -> None:


def _is_cuda() -> bool:
return torch.version.cuda is not None
return VLLM_TARGET_DEVICE == "cuda" and torch.version.cuda is not None


def _is_hip() -> bool:
return torch.version.hip is not None
return (VLLM_TARGET_DEVICE == "cuda"
or VLLM_TARGET_DEVICE == "rocm") and torch.version.hip is not None


def _is_neuron() -> bool:
Expand All @@ -184,6 +187,10 @@ def _is_neuron() -> bool:
return torch_neuronx_installed


def _is_cpu() -> bool:
return VLLM_TARGET_DEVICE == "cpu"


def _install_punica() -> bool:
return bool(int(os.getenv("VLLM_INSTALL_PUNICA_KERNELS", "0")))

Expand Down Expand Up @@ -279,6 +286,8 @@ def get_vllm_version() -> str:
if neuron_version != MAIN_CUDA_VERSION:
neuron_version_str = neuron_version.replace(".", "")[:3]
version += f"+neuron{neuron_version_str}"
elif _is_cpu():
version += "+cpu"
else:
raise RuntimeError("Unknown runtime environment")

Expand Down Expand Up @@ -311,6 +320,8 @@ def get_requirements() -> List[str]:
elif _is_neuron():
with open(get_path("requirements-neuron.txt")) as f:
requirements = f.read().strip().split("\n")
elif _is_cpu():
requirements = []
else:
raise ValueError(
"Unsupported platform, please use CUDA, ROCM or Neuron.")
Expand Down

0 comments on commit d86a64a

Please sign in to comment.