Skip to content

Commit

Permalink
[MXNET-372] Add build flag for USE_F16C in CMake and clarify flag in …
Browse files Browse the repository at this point in the history
…make (apache#10760)

* add comments to makefile config

* add f16c check in mxnet

* update cmake

* clarify

* small updates

* add message

* add message

* update msvc message

* update mshadow

* typo

* only print message for MSVC if USE_F16C

* improve build logic

* update mshadow

* remove def from amalgamation makefile

* trigger CI
  • Loading branch information
rahul003 authored and zheng-da committed Jun 28, 2018
1 parent 774c492 commit b00a445
Show file tree
Hide file tree
Showing 6 changed files with 65 additions and 2 deletions.
2 changes: 1 addition & 1 deletion 3rdparty/mshadow
28 changes: 28 additions & 0 deletions CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@ mxnet_option(USE_OPENCV "Build with OpenCV support" ON)
mxnet_option(USE_OPENMP "Build with Openmp support" ON)
mxnet_option(USE_CUDNN "Build with cudnn support" ON) # one could set CUDNN_ROOT for search path
mxnet_option(USE_SSE "Build with x86 SSE instruction support" ON)
mxnet_option(USE_F16C "Build with x86 F16C instruction support" ON) # autodetects support if ON
mxnet_option(USE_LAPACK "Build with lapack support" ON IF NOT MSVC)
mxnet_option(USE_MKL_IF_AVAILABLE "Use MKL if found" ON)
mxnet_option(USE_MKLML_MKL "Use MKLDNN variant of MKL (if MKL found)" ON IF USE_MKL_IF_AVAILABLE AND UNIX AND (NOT APPLE))
Expand Down Expand Up @@ -86,6 +87,10 @@ if(MSVC)
add_definitions(-DNNVM_EXPORTS)
add_definitions(-DDMLC_STRICT_CXX11)
add_definitions(-DNOMINMAX)
set(SUPPORT_F16C FALSE)
if(USE_F16C)
message("F16C instruction set is not yet supported for MSVC")
endif()
set(CMAKE_C_FLAGS "/MP")
set(CMAKE_CXX_FLAGS "${CMAKE_C_FLAGS} /bigobj")
else(MSVC)
Expand All @@ -102,6 +107,29 @@ else(MSVC)
else()
set(SUPPORT_MSSE2 FALSE)
endif()
# For cross complication, turn off flag if target device does not support it
if(USE_F16C)
check_cxx_compiler_flag("-mf16c" COMPILER_SUPPORT_MF16C)
if(CMAKE_SYSTEM_NAME STREQUAL "Linux")
execute_process(COMMAND cat /proc/cpuinfo
COMMAND grep flags
COMMAND grep f16c
OUTPUT_VARIABLE CPU_SUPPORT_F16C)
elseif(CMAKE_SYSTEM_NAME STREQUAL "Darwin")
execute_process(COMMAND sysctl -a
COMMAND grep machdep.cpu.features
COMMAND grep F16C
OUTPUT_VARIABLE CPU_SUPPORT_F16C)
endif()
if(NOT CPU_SUPPORT_F16C)
message("CPU does not support F16C instructions")
endif()
if(CPU_SUPPORT_F16C AND COMPILER_SUPPORT_MF16C)
set(SUPPORT_F16C TRUE)
endif()
else()
set(SUPPORT_F16C FALSE)
endif()
set(CMAKE_C_FLAGS "-Wall -Wno-unknown-pragmas -fPIC -Wno-sign-compare")
if ("${CMAKE_CXX_COMPILER_ID}" MATCHES ".*Clang$")
set(CMAKE_C_FLAGS "-Wno-braced-scalar-init")
Expand Down
24 changes: 24 additions & 0 deletions Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -187,6 +187,30 @@ ifeq ($(USE_CUDNN), 1)
LDFLAGS += -lcudnn
endif

# whether to use F16C instruction set extension for fast fp16 compute on CPU
# if cross compiling you may want to explicitly turn it off if target system does not support it
ifndef USE_F16C
ifneq ($(OS),Windows_NT)
detected_OS := $(shell uname -s)
ifeq ($(detected_OS),Darwin)
F16C_SUPP = $(shell sysctl -a | grep machdep.cpu.features | grep F16C)
endif
ifeq ($(detected_OS),Linux)
F16C_SUPP = $(shell cat /proc/cpuinfo | grep flags | grep f16c)
endif
ifneq ($(strip $(F16C_SUPP)),)
USE_F16C=1
else
USE_F16C=0
endif
endif
# if OS is Windows, check if your processor and compiler support F16C architecture.
# One way to check if processor supports it is to download the tool
# https://docs.microsoft.com/en-us/sysinternals/downloads/coreinfo.
# If coreinfo -c shows F16C and compiler supports it,
# then you can set USE_F16C=1 explicitly to leverage that capability"
endif

# gperftools malloc library (tcmalloc)
ifeq ($(USE_GPERFTOOLS), 1)
# FIND_LIBNAME=tcmalloc_and_profiler
Expand Down
1 change: 0 additions & 1 deletion amalgamation/Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -55,7 +55,6 @@ CFLAGS=-std=c++11 -Wno-unknown-pragmas -Wall $(DEFS)
# if architecture of the CPU supports F16C instruction set, enable USE_F16C for fast fp16 computation on CPU
ifeq ($(USE_F16C), 1)
CFLAGS+=-mf16c
DEFS+=-DMSHADOW_USE_F16C=1
else
DEFS+=-DMSHADOW_USE_F16C=0
endif
Expand Down
9 changes: 9 additions & 0 deletions make/config.mk
Original file line number Diff line number Diff line change
Expand Up @@ -132,10 +132,19 @@ endif
ARCH := $(shell uname -a)
ifneq (,$(filter $(ARCH), armv6l armv7l powerpc64le ppc64le aarch64))
USE_SSE=0
USE_F16C=0
else
USE_SSE=1
endif

#----------------------------
# F16C instruction support for faster arithmetic of fp16 on CPU
#----------------------------
# For distributed training with fp16, this helps even if training on GPUs
# If left empty, checks CPU support and turns it on.
# For cross compilation, please check support for F16C on target device and turn off if necessary.
USE_F16C =

#----------------------------
# distributed computing
#----------------------------
Expand Down
3 changes: 3 additions & 0 deletions make/crosscompile.jetson.mk
Original file line number Diff line number Diff line change
Expand Up @@ -132,7 +132,10 @@ endif
# Settings for power and arm arch
#----------------------------
USE_SSE=0

# Turn off F16C instruction set support
USE_F16C=0

#----------------------------
# distributed computing
#----------------------------
Expand Down

0 comments on commit b00a445

Please sign in to comment.