Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Feature : Add SYCL runtime support #747

Open
wants to merge 69 commits into
base: sycl-refactor
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
69 commits
Select commit Hold shift + click to select a range
75d9277
init kernel headers
abhilash1910 Sep 5, 2023
279fce4
modify kernel header
abhilash1910 Sep 20, 2023
d9dcee9
modify headers
abhilash1910 Sep 21, 2023
ddae8ab
update kernels
abhilash1910 Sep 22, 2023
adbffb7
Merge branch 'TimDettmers:main' into sycl
abhilash1910 Nov 3, 2023
26ad690
Merge branch 'TimDettmers:main' into sycl
abhilash1910 Mar 12, 2024
1f2c8bc
add partial kernels
abhilash1910 Mar 20, 2024
dd935ad
add quant kernels
abhilash1910 Mar 20, 2024
d93fc2e
add precondition optimizer kernel sycl
abhilash1910 Mar 21, 2024
29e895f
add koptimizer 8bits sycl
abhilash1910 Mar 21, 2024
98bd39b
add row col transform kernels
abhilash1910 Mar 21, 2024
8e15d5a
add gemm kernels
abhilash1910 Mar 22, 2024
bd9e829
fix header path
abhilash1910 Mar 22, 2024
45318d6
fix handler kernels
abhilash1910 Mar 22, 2024
c9d606b
integrate kenrels with ops on kquant
abhilash1910 Mar 30, 2024
adda60d
add 8 bit opt with ops
abhilash1910 Apr 1, 2024
beffe78
add 8 bit 2 state adam kernel
abhilash1910 Apr 1, 2024
ed431fe
8 bit optimizer ops integrate
abhilash1910 Apr 1, 2024
7ab3273
add all opt & quant kernels
abhilash1910 Apr 1, 2024
43a1899
add shared vars
abhilash1910 Apr 1, 2024
38c2f8c
add shared for spm
abhilash1910 Apr 1, 2024
04a3ba1
add gemm ops + kernel fix
abhilash1910 Apr 1, 2024
06ba221
update kernel headers
abhilash1910 Apr 2, 2024
ee7997d
port transform kernels
abhilash1910 Apr 3, 2024
26863ea
use ldg & sparse csr
abhilash1910 Apr 4, 2024
caa72f6
add dnn prototype
abhilash1910 Apr 8, 2024
46f1f85
dnn kernel
abhilash1910 Apr 8, 2024
a94c253
add cmake initial
abhilash1910 Apr 9, 2024
89fb73b
cmake fix
abhilash1910 Apr 9, 2024
2644374
fix build
abhilash1910 Apr 9, 2024
14f20e4
fix build
abhilash1910 Apr 9, 2024
d151d0c
fix build
abhilash1910 Apr 10, 2024
dfcd9d8
fix dnnl
abhilash1910 Apr 10, 2024
886751c
refactor with new api and use accessor
abhilash1910 May 10, 2024
f90c06d
full 32 optimizer fixed update
abhilash1910 May 13, 2024
873eadb
refine 8 bit optimizers
abhilash1910 May 13, 2024
f71243b
refine 8 bit blockwise opt
abhilash1910 May 13, 2024
493a5ec
refine k quantize blockwise
abhilash1910 May 14, 2024
760a120
refine k quantize
abhilash1910 May 14, 2024
c7d8326
refine percentile clipping
abhilash1910 May 14, 2024
693ca79
refine estimate quantiles and k dequantize & headers
abhilash1910 May 14, 2024
3a41f96
fix errors in k compress
abhilash1910 May 15, 2024
37897ee
refine template types for k quants
abhilash1910 May 15, 2024
7ddce74
remove mma header
abhilash1910 May 15, 2024
c039aa1
refine historgram
abhilash1910 May 15, 2024
b9e9a9c
refine row stats
abhilash1910 May 16, 2024
c8473b5
refine double row col quants
abhilash1910 May 16, 2024
c85baf9
refine helper functions
abhilash1910 May 16, 2024
922786e
complete refine non gemm kernels
abhilash1910 May 16, 2024
ee8225e
fix extract function
abhilash1910 May 16, 2024
49ca1d7
refine igemmlt kernels
abhilash1910 May 20, 2024
342f95c
refine dnn and gemm 4 bit kernels
abhilash1910 May 21, 2024
5e2611a
refine spm experimental
abhilash1910 May 21, 2024
6af46e2
fix host dereference issue on nv dequant & kquant
abhilash1910 May 26, 2024
bcad0ea
fix header name
abhilash1910 Jun 6, 2024
6d3ed26
fix nv host dereference issue on 8 bit
abhilash1910 Jun 6, 2024
4f19d41
fix nv issue k quants
abhilash1910 Jun 6, 2024
dd709ca
fix nv issue on row col quant
abhilash1910 Jun 6, 2024
0fbedfe
fix nv issue on 32 bit
abhilash1910 Jun 7, 2024
38312cb
fix nv issue on k dequant blockwise
abhilash1910 Jun 7, 2024
1d6f56b
fix nv issue on gemms and some quant kernels
abhilash1910 Jun 7, 2024
b107b9c
refine
abhilash1910 Jun 7, 2024
7838151
refine
abhilash1910 Jun 7, 2024
46d10ba
refine
abhilash1910 Jun 7, 2024
8795844
add dnn build flag
abhilash1910 Jun 7, 2024
7007a02
update dnn linkage
abhilash1910 Jun 27, 2024
50a2838
Merge branch 'sycl-refactor' into sycl
abhilash1910 Jul 29, 2024
6f8ef48
fix cmake
abhilash1910 Aug 7, 2024
a425d44
upgrade to 2024.2 Intel LLVM compiler release, set Nvidia build flag
abhilash1910 Aug 7, 2024
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
59 changes: 49 additions & 10 deletions CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -3,15 +3,15 @@
# For GCC: `cmake -B build . && cmake --build build`
# For MSVC: `cmake -B build . && cmake --build build --config Release`
# You can also use the following options and variables
# - COMPUTE_BACKEND: Set to `cpu`, `cuda`, or `mps` to select the backend
# - COMPUTE_BACKEND: Set to `cpu`, `cuda`, `mps`, or `sycl` to select the backend
# - NO_CUBLASLT: Default OFF, will skip building/linking CUBLASLT support
# - CUDA_VERSION: The expected CUDA version, for sanity checking. The actual version
# is whatever CMake finds on your path.
# - COMPUTE_CAPABILITY: Which GPU Arch/Compute codes to provide to NVCC.
# Separate by semicolons, i.e. `-DCOMPUTE_CAPABILITY=89;90`
# Check your compute capability here: https://developer.nvidia.com/cuda-gpus
# - PTXAS_VERBOSE: Pass the `-v` option to the PTX Assembler
cmake_minimum_required(VERSION 3.22.1)
cmake_minimum_required(VERSION 3.20.4)

project(bitsandbytes LANGUAGES CXX)

Expand All @@ -24,15 +24,18 @@ if(NOT CMAKE_BUILD_TYPE)
endif()

# Define included source files
set(CPP_FILES csrc/common.cpp csrc/cpu_ops.cpp csrc/pythonInterface.cpp)
set(CPP_FILES csrc/common.cpp csrc/cpu_ops.cpp)
set(CUDA_FILES csrc/ops.cu csrc/kernels.cu)
set(MPS_FILES csrc/mps_ops.mm)
set(METAL_FILES csrc/mps_kernels.metal)
set(SYCL_FILES csrc/sycl/kernels.cpp csrc/sycl/ops.cpp csrc/pythonInterface.cpp)
#set(SYCL_FILES csrc/sycl/kernel_gemm.cpp csrc/sycl/op_gemm.cpp csrc/sycl/kernel_quant.cpp csrc/sycl/op_quant.cpp)

# C++ sources are always included
list(APPEND SRC_FILES ${CPP_FILES})

set(COMPUTE_BACKEND "cpu" CACHE STRING "The compute backend to use (cpu, cuda, mps)")
set_property(CACHE COMPUTE_BACKEND PROPERTY STRINGS cpu cuda mps)
set(COMPUTE_BACKEND "cpu" CACHE STRING "The compute backend to use (cpu, cuda, mps, sycl)")
set_property(CACHE COMPUTE_BACKEND PROPERTY STRINGS cpu cuda mps sycl)
option(PTXAS_VERBOSE "Pass through -v flag to PTX Assembler" OFF)

if(APPLE)
Expand All @@ -50,16 +53,23 @@ if(${COMPUTE_BACKEND} STREQUAL "cuda")
option(NO_CUBLASLT "Disable CUBLAS" OFF)
set(BUILD_CUDA ON)
set(BUILD_MPS OFF)
set(BUILD_SYCL OFF)
message(STATUS "NO_CUBLASLT := ${NO_CUBLASLT}")
elseif(${COMPUTE_BACKEND} STREQUAL "mps")
if(NOT APPLE)
message(FATAL_ERROR "MPS is only supported on macOS" )
endif()
set(BUILD_CUDA OFF)
set(BUILD_MPS ON)
set(BUILD_SYCL OFF)
elseif(${COMPUTE_BACKEND} STREQUAL "sycl")
set(BUILD_CUDA OFF)
set(BUILD_SYCL ON)
set(BUILD_MPS OFF)
else()
set(BUILD_CUDA OFF)
set(BUILD_MPS OFF)
set(BUILD_SYCL OFF)
endif()


Expand Down Expand Up @@ -177,12 +187,31 @@ elseif(BUILD_MPS)
COMMENT "Compiling Metal kernels"
VERBATIM)
add_custom_target(metallib DEPENDS "bitsandbytes/bitsandbytes.metallib")
elseif(BUILD_SYCL)
if ( NOT DEFINED ENV{ONEAPI_ROOT})
message(FATAL_ERROR "Not detect ENV {ONEAPI_ROOT}, please install oneAPI & source it, like: source /opt/intel/oneapi/setvars.sh")
endif()
find_package(IntelSYCL REQUIRED)
set(CMAKE_CXX_STANDARD 17)
add_compile_options(-I./) #include DPCT
add_compile_options(-I/${SYCL_INCLUDE_DIR})

set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Wno-narrowing")
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -O3")
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fsycl -L${MKLROOT}/lib")
if (SYCL_TARGET STREQUAL "INTEL")
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fsycl-targets=spir64 -L${MKLROOT}/lib")
elseif( SYCL_TARGET STREQUAL "NVIDIA")
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fsycl-targets=nvptx64-nvidia-cuda")
endif()
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -w")
list(APPEND SRC_FILES ${SYCL_FILES})

else()
string(APPEND BNB_OUTPUT_NAME "_cpu")
set(GPU_SOURCES)
endif()



if(WIN32)
# Export all symbols
set(CMAKE_WINDOWS_EXPORT_ALL_SYMBOLS ON)
Expand All @@ -195,9 +224,12 @@ endif()

set_source_files_properties(${CPP_FILES} PROPERTIES LANGUAGE CXX)
add_library(bitsandbytes SHARED ${SRC_FILES})
target_compile_features(bitsandbytes PUBLIC cxx_std_14)
target_include_directories(bitsandbytes PUBLIC csrc include)

if(BUILD_SYCL)
target_compile_features(bitsandbytes PUBLIC cxx_std_17)
else()
target_compile_features(bitsandbytes PUBLIC cxx_std_14)
endif()
target_include_directories(bitsandbytes PUBLIC csrc csrc/sycl include)

if(BUILD_CUDA)
target_include_directories(bitsandbytes PUBLIC ${CMAKE_CUDA_TOOLKIT_INCLUDE_DIRECTORIES})
Expand All @@ -218,6 +250,13 @@ if(BUILD_MPS)
target_link_libraries(bitsandbytes objc "-framework Foundation" "-framework Metal" "-framework MetalPerformanceShaders" "-framework MetalPerformanceShadersGraph")
endif()

if(BUILD_SYCL)
if (SYCL_TARGET STREQUAL "INTEL")
target_link_libraries(bitsandbytes PUBLIC OpenCL mkl_core pthread m dl mkl_intel_ilp64 mkl_tbb_thread dnnl)
elseif(SYCL_TARGET STREQUAL "NVIDIA")
target_link_libraries(bitsandbytes PUBLIC onemkl pthread m dl)
endif()
endif()
if(WIN32)
set_target_properties(bitsandbytes PROPERTIES PREFIX "lib")
endif()
Expand Down
Loading