Skip to content

Commit

Permalink
GH-23221: [C++] Add support for building with Emscripten (#37821)
Browse files Browse the repository at this point in the history
Split from #37696 

This is just the cmake changes to enable building on emscripten.

Changes are:
1) Support for target system "emscripten"
2) Cmake preset for building libarrow ` ninja-release-python-emscripten` (same as `ninja-release-python`, but with emscripten support)
3) Override file for cmake on emscripten, to set various build parameters that need setting to make it build there.
4) Changes in pyarrow cmake so it works if you are building libarrow as shared library, and also an option to enable the cmake file there to just dump the current arrow configuration, which is useful for cross-compile builds.

* Closes: #23221

Lead-authored-by: Joe Marshall <[email protected]>
Co-authored-by: Sutou Kouhei <[email protected]>
Co-authored-by: Joris Van den Bossche <[email protected]>
Co-authored-by: Sutou Kouhei <[email protected]>
Signed-off-by: Sutou Kouhei <[email protected]>
  • Loading branch information
4 people authored Apr 5, 2024
1 parent 72d20ad commit 1a1d2c8
Show file tree
Hide file tree
Showing 31 changed files with 640 additions and 15 deletions.
16 changes: 15 additions & 1 deletion ci/docker/ubuntu-22.04-cpp.dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -65,6 +65,7 @@ RUN latest_system_llvm=14 && \
RUN apt-get update -y -q && \
apt-get install -y -q --no-install-recommends \
autoconf \
bzip2 \
ca-certificates \
ccache \
cmake \
Expand Down Expand Up @@ -115,10 +116,20 @@ RUN apt-get update -y -q && \
rapidjson-dev \
rsync \
tzdata \
wget && \
wget \
xz-utils && \
apt-get clean && \
rm -rf /var/lib/apt/lists*

# install emscripten using EMSDK
ARG emscripten_version="3.1.45"
RUN cd ~ && git clone https://github.com/emscripten-core/emsdk.git && \
cd emsdk && \
./emsdk install ${emscripten_version} && \
./emsdk activate ${emscripten_version} && \
echo "Installed emsdk to:" ~/emsdk


ARG gcc_version=""
RUN if [ "${gcc_version}" = "" ]; then \
apt-get update -y -q && \
Expand Down Expand Up @@ -151,6 +162,9 @@ RUN if [ "${gcc_version}" = "" ]; then \
update-alternatives --set c++ /usr/bin/g++; \
fi

# make sure zlib is cached in the EMSDK folder
RUN source ~/emsdk/emsdk_env.sh && embuilder --pic build zlib

COPY ci/scripts/install_minio.sh /arrow/ci/scripts/
RUN /arrow/ci/scripts/install_minio.sh latest /usr/local

Expand Down
20 changes: 20 additions & 0 deletions ci/scripts/cpp_build.sh
Original file line number Diff line number Diff line change
Expand Up @@ -92,6 +92,25 @@ esac
mkdir -p ${build_dir}
pushd ${build_dir}

if [ "${ARROW_EMSCRIPTEN:-OFF}" = "ON" ]; then
if [ "${UBUNTU}" = "20.04" ]; then
echo "arrow emscripten build is not supported on Ubuntu 20.04, run with UBUNTU=22.04"
exit -1
fi
n_jobs=2 # Emscripten build fails on docker unless this is set really low
source ~/emsdk/emsdk_env.sh
emcmake cmake \
--preset=ninja-${ARROW_BUILD_TYPE:-debug}-emscripten \
-DCMAKE_VERBOSE_MAKEFILE=${CMAKE_VERBOSE_MAKEFILE:-OFF} \
-DCMAKE_C_FLAGS="${CFLAGS:-}" \
-DCMAKE_CXX_FLAGS="${CXXFLAGS:-}" \
-DCMAKE_CXX_STANDARD="${CMAKE_CXX_STANDARD:-17}" \
-DCMAKE_INSTALL_LIBDIR=${CMAKE_INSTALL_LIBDIR:-lib} \
-DCMAKE_INSTALL_PREFIX=${CMAKE_INSTALL_PREFIX:-${ARROW_HOME}} \
-DCMAKE_UNITY_BUILD=${CMAKE_UNITY_BUILD:-OFF} \
${ARROW_CMAKE_ARGS} \
${source_dir}
else
cmake \
-Dabsl_SOURCE=${absl_SOURCE:-} \
-DARROW_ACERO=${ARROW_ACERO:-OFF} \
Expand Down Expand Up @@ -198,6 +217,7 @@ cmake \
-G "${CMAKE_GENERATOR:-Ninja}" \
${ARROW_CMAKE_ARGS} \
${source_dir}
fi

export CMAKE_BUILD_PARALLEL_LEVEL=${CMAKE_BUILD_PARALLEL_LEVEL:-$[${n_jobs} + 1]}
time cmake --build . --target install
Expand Down
4 changes: 4 additions & 0 deletions ci/scripts/cpp_test.sh
Original file line number Diff line number Diff line change
Expand Up @@ -80,6 +80,10 @@ case "$(uname)" in
;;
esac

if [ "${ARROW_EMSCRIPTEN:-OFF}" = "ON" ]; then
n_jobs=1 # avoid spurious fails on emscripten due to loading too many big executables
fi

pushd ${build_dir}

if [ -z "${PYTHON}" ] && ! which python > /dev/null 2>&1; then
Expand Down
Empty file modified ci/scripts/go_bench.sh
100644 → 100755
Whitespace-only changes.
44 changes: 44 additions & 0 deletions cpp/CMakePresets.json
Original file line number Diff line number Diff line change
Expand Up @@ -46,6 +46,32 @@
"CMAKE_BUILD_TYPE": "RelWithDebInfo"
}
},
{
"name": "features-emscripten",
"hidden": true,
"cacheVariables": {
"ARROW_ACERO": "ON",
"ARROW_BUILD_SHARED": "OFF",
"ARROW_BUILD_STATIC": "ON",
"ARROW_CUDA": "OFF",
"ARROW_DEPENDENCY_SOURCE": "BUNDLED",
"ARROW_DEPENDENCY_USE_SHARED": "OFF",
"ARROW_ENABLE_THREADING": "OFF",
"ARROW_FLIGHT": "OFF",
"ARROW_IPC": "ON",
"ARROW_JEMALLOC": "OFF",
"ARROW_MIMALLOC": "OFF",
"ARROW_ORC": "ON",
"ARROW_RUNTIME_SIMD_LEVEL": "NONE",
"ARROW_S3": "OFF",
"ARROW_SIMD_LEVEL": "NONE",
"ARROW_SUBSTRAIT": "ON",
"ARROW_WITH_BROTLI": "ON",
"ARROW_WITH_OPENTELEMETRY": "OFF",
"ARROW_WITH_SNAPPY": "ON",
"CMAKE_C_BYTE_ORDER": "LITTLE_ENDIAN"
}
},
{
"name": "features-minimal",
"hidden": true,
Expand Down Expand Up @@ -341,6 +367,24 @@
"displayName": "Release build with CUDA integration",
"cacheVariables": {}
},
{
"name": "ninja-debug-emscripten",
"inherits": [
"features-emscripten",
"base-debug"
],
"displayName": "Debug build which builds an Emscripten library",
"cacheVariables": {}
},
{
"name": "ninja-release-emscripten",
"inherits": [
"features-emscripten",
"base-release"
],
"displayName": "Release build which builds an Emscripten library",
"cacheVariables": {}
},
{
"name": "ninja-release-flight",
"inherits": [
Expand Down
24 changes: 24 additions & 0 deletions cpp/build-support/emscripten-test-init.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,24 @@
// Licensed to the Apache Software Foundation (ASF) under one
// or more contributor license agreements. See the NOTICE file
// distributed with this work for additional information
// regarding copyright ownership. The ASF licenses this file
// to you under the Apache License, Version 2.0 (the
// "License"); you may not use this file except in compliance
// with the License. You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing,
// software distributed under the License is distributed on an
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
// KIND, either express or implied. See the License for the
// specific language governing permissions and limitations
// under the License.

var Module = {
};

// make sure tests can access the current parquet test data files
Module.preRun = () => {ENV.PARQUET_TEST_DATA = process.env.PARQUET_TEST_DATA;
ENV.ARROW_TEST_DATA = process.env.ARROW_TEST_DATA;
};
4 changes: 2 additions & 2 deletions cpp/cmake_modules/BuildUtils.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -760,8 +760,8 @@ function(ADD_TEST_CASE REL_TEST_NAME)
valgrind --suppressions=valgrind.supp --tool=memcheck --gen-suppressions=all \
--num-callers=500 --leak-check=full --leak-check-heuristics=stdstring \
--error-exitcode=1 ${TEST_PATH} ${ARG_TEST_ARGUMENTS}")
elseif(WIN32)
add_test(${TEST_NAME} ${TEST_PATH} ${ARG_TEST_ARGUMENTS})
elseif(WIN32 OR CMAKE_SYSTEM_NAME STREQUAL "Emscripten")
add_test(NAME ${TEST_NAME} COMMAND ${TEST_NAME} ${ARG_TEST_ARGUMENTS})
else()
add_test(${TEST_NAME}
${BUILD_SUPPORT_DIR}/run-test.sh
Expand Down
65 changes: 64 additions & 1 deletion cpp/cmake_modules/SetupCxxFlags.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,9 @@ include(CheckCXXSourceCompiles)
message(STATUS "System processor: ${CMAKE_SYSTEM_PROCESSOR}")

if(NOT DEFINED ARROW_CPU_FLAG)
if(CMAKE_SYSTEM_PROCESSOR MATCHES "AMD64|amd64|X86|x86|i[3456]86|x64")
if(CMAKE_SYSTEM_NAME STREQUAL "Emscripten")
set(ARROW_CPU_FLAG "emscripten")
elseif(CMAKE_SYSTEM_PROCESSOR MATCHES "AMD64|amd64|X86|x86|i[3456]86|x64")
set(ARROW_CPU_FLAG "x86")
elseif(CMAKE_SYSTEM_PROCESSOR MATCHES "aarch64|ARM64|arm64")
set(ARROW_CPU_FLAG "aarch64")
Expand Down Expand Up @@ -312,7 +314,12 @@ if("${BUILD_WARNING_LEVEL}" STREQUAL "CHECKIN")
set(CXX_COMMON_FLAGS "${CXX_COMMON_FLAGS} -Wall")
set(CXX_COMMON_FLAGS "${CXX_COMMON_FLAGS} -Wextra")
set(CXX_COMMON_FLAGS "${CXX_COMMON_FLAGS} -Wdocumentation")
if(CMAKE_SYSTEM_NAME STREQUAL "Emscripten")
# size_t is 32 bit in Emscripten wasm32 - ignore conversion errors
set(CXX_COMMON_FLAGS "${CXX_COMMON_FLAGS} -Wno-shorten-64-to-32")
else()
set(CXX_COMMON_FLAGS "${CXX_COMMON_FLAGS} -Wshorten-64-to-32")
endif()
set(CXX_COMMON_FLAGS "${CXX_COMMON_FLAGS} -Wno-missing-braces")
set(CXX_COMMON_FLAGS "${CXX_COMMON_FLAGS} -Wno-unused-parameter")
set(CXX_COMMON_FLAGS "${CXX_COMMON_FLAGS} -Wno-constant-logical-operand")
Expand Down Expand Up @@ -692,19 +699,38 @@ if(NOT MSVC)
set(C_DEBUG_FLAGS "")
set(CXX_DEBUG_FLAGS "")
if(NOT MSVC)
if(CMAKE_SYSTEM_NAME STREQUAL "Emscripten")
# with -g it uses DWARF debug info, which is really slow to build
# on emscripten (and uses tons of memory)
string(REPLACE "-g" " " CMAKE_CXX_FLAGS_DEBUG ${CMAKE_CXX_FLAGS_DEBUG})
string(REPLACE "-g" " " CMAKE_C_FLAGS_DEBUG ${CMAKE_C_FLAGS_DEBUG})
string(APPEND C_DEBUG_FLAGS " -g2")
string(APPEND CXX_DEBUG_FLAGS " -g2")
string(APPEND C_RELWITHDEBINFO_FLAGS " -g2")
string(APPEND CXX_RELWITHDEBINFO_FLAGS " -g2")
# without -O1, emscripten executables are *MASSIVE*. Don't use -O0
if(NOT CMAKE_C_FLAGS_DEBUG MATCHES "-O")
string(APPEND C_DEBUG_FLAGS " -O1")
endif()
if(NOT CMAKE_CXX_FLAGS_DEBUG MATCHES "-O")
string(APPEND CXX_DEBUG_FLAGS " -O1")
endif()
else()
if(NOT CMAKE_C_FLAGS_DEBUG MATCHES "-O")
string(APPEND C_DEBUG_FLAGS " -O0")
endif()
if(NOT CMAKE_CXX_FLAGS_DEBUG MATCHES "-O")
string(APPEND CXX_DEBUG_FLAGS " -O0")
endif()

if(ARROW_GGDB_DEBUG)
string(APPEND C_DEBUG_FLAGS " -ggdb")
string(APPEND CXX_DEBUG_FLAGS " -ggdb")
string(APPEND C_RELWITHDEBINFO_FLAGS " -ggdb")
string(APPEND CXX_RELWITHDEBINFO_FLAGS " -ggdb")
endif()
endif()
endif()

string(APPEND CMAKE_C_FLAGS_RELEASE "${C_RELEASE_FLAGS} ${ARROW_C_FLAGS_RELEASE}")
string(APPEND CMAKE_CXX_FLAGS_RELEASE "${CXX_RELEASE_FLAGS} ${ARROW_CXX_FLAGS_RELEASE}")
Expand Down Expand Up @@ -733,3 +759,40 @@ if(MSVC)
set(CMAKE_SHARED_LINKER_FLAGS "${CMAKE_SHARED_LINKER_FLAGS} ${MSVC_LINKER_FLAGS}")
endif()
endif()

if(CMAKE_SYSTEM_NAME STREQUAL "Emscripten")
# flags are:
# 1) We force *everything* to build as position independent
# 2) And with support for C++ exceptions
set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -fPIC -fexceptions")
# deprecated-literal-operator error is thrown in datetime (vendored lib in arrow)
set(CMAKE_CXX_FLAGS
"${CMAKE_CXX_FLAGS} -fPIC -fexceptions -Wno-error=deprecated-literal-operator")

# flags for creating shared libraries (only used in pyarrow, because
# Emscripten builds libarrow as static)
# flags are:
# 1) Tell it to use JavaScript / WebAssembly 64 bit number support.
# 2) Tell it to build with support for C++ exceptions
# 3) Skip linker flags error which happens with -soname parameter
set(ARROW_EMSCRIPTEN_LINKER_FLAGS "-sWASM_BIGINT=1 -fexceptions -Wno-error=linkflags")
set(CMAKE_SHARED_LIBRARY_CREATE_C_FLAGS
"-sSIDE_MODULE=1 ${ARROW_EMSCRIPTEN_LINKER_FLAGS}")
set(CMAKE_SHARED_LIBRARY_CREATE_CXX_FLAGS
"-sSIDE_MODULE=1 ${ARROW_EMSCRIPTEN_LINKER_FLAGS}")
set(CMAKE_SHARED_LINKER_FLAGS "-sSIDE_MODULE=1 ${ARROW_EMSCRIPTEN_LINKER_FLAGS}")
if(ARROW_TESTING)
# flags for building test executables for use in node
if("${CMAKE_BUILD_TYPE}" STREQUAL "RELEASE")
set(CMAKE_EXE_LINKER_FLAGS
"${ARROW_EMSCRIPTEN_LINKER_FLAGS} -sALLOW_MEMORY_GROWTH -lnodefs.js -lnoderawfs.js --pre-js ${BUILD_SUPPORT_DIR}/emscripten-test-init.js"
)
else()
set(CMAKE_EXE_LINKER_FLAGS
"${ARROW_EMSCRIPTEN_LINKER_FLAGS} -sERROR_ON_WASM_CHANGES_AFTER_LINK=1 -sALLOW_MEMORY_GROWTH -lnodefs.js -lnoderawfs.js --pre-js ${BUILD_SUPPORT_DIR}/emscripten-test-init.js"
)
endif()
else()
set(CMAKE_EXE_LINKER_FLAGS "${ARROW_EMSCRIPTEN_LINKER_FLAGS} -sALLOW_MEMORY_GROWTH")
endif()
endif()
Loading

0 comments on commit 1a1d2c8

Please sign in to comment.