diff --git a/azure-pipelines.yml b/azure-pipelines.yml index 2e0c9d4e1e..a2e8da964a 100644 --- a/azure-pipelines.yml +++ b/azure-pipelines.yml @@ -193,7 +193,7 @@ jobs: displayName: "Which python" - script: | - python -m pip install -U numpy "pyarrow>=0.16.0,<1" pip + python -m pip install -U numpy "pyarrow>=2" pip displayName: "Python deps" - script: npm install -g yarn @@ -202,7 +202,10 @@ jobs: - script: yarn displayName: 'Install Deps' - - script: yarn build_python --ci $(python_flag) + - script: choco install flatc + displayName: "Install Flatbuffers" + + - script: yarn build_python --ci $(python_flag) displayName: 'build' env: # Set `BOOST_ROOT` manually, as `BOOST_ROOT` is removed in the VM: @@ -256,7 +259,7 @@ jobs: versionSpec: '12.x' - script: | - brew install boost + brew install boost flatbuffers displayName: "System deps" - script: | @@ -266,8 +269,15 @@ jobs: displayName: "Which python" - script: | - python -m pip install -U delocate wheel numpy "pyarrow>=0.16.0,<1" pip - displayName: "Python deps" + python -m pip install -U delocate wheel numpy "pyarrow>=2" pip + displayName: "Python 3 deps" + condition: eq(variables['python_flag'], '') + + # Last released PyArrow version on Python 2 is 0.16.0 + - script: | + python -m pip install -U delocate wheel numpy "pyarrow==0.16.0" pip + displayName: "Python 2 deps" + condition: eq(variables['python_flag'], '--python2') - script: npm install -g yarn displayName: "Install Yarn" diff --git a/binder/requirements.txt b/binder/requirements.txt index ce4d169c93..54b02f8dee 100644 --- a/binder/requirements.txt +++ b/binder/requirements.txt @@ -1,5 +1,5 @@ ipywidgets==7.5.1 jupyterlab==2.2.8 pandas==0.25.3 -pyarrow==0.17.1 +pyarrow==2.0.0 voila==0.2.3 \ No newline at end of file diff --git a/cmake/arrow.txt.in b/cmake/arrow.txt.in index 208d4a1eb2..d8da63f81b 100644 --- a/cmake/arrow.txt.in +++ b/cmake/arrow.txt.in @@ -12,5 +12,5 @@ ExternalProject_Add(apachearrow BUILD_COMMAND "" INSTALL_COMMAND "" TEST_COMMAND "" - CMAKE_ARGS "-DCMAKE_TOOLCHAIN_FILE=${CMAKE_TOOLCHAIN_FILE}" + CMAKE_ARGS "-DCMAKE_TOOLCHAIN_FILE=${CMAKE_TOOLCHAIN_FILE} -DFLATBUFFERS_BUILD_TESTS=OFF" ) diff --git a/cmake/arrow/CMakeLists.txt b/cmake/arrow/CMakeLists.txt index 51c11ac7f9..221770ce32 100644 --- a/cmake/arrow/CMakeLists.txt +++ b/cmake/arrow/CMakeLists.txt @@ -101,6 +101,28 @@ set(ARROW_SRCS ${CMAKE_BINARY_DIR}/arrow-src/cpp/src/arrow/ipc/options.cc ${CMAKE_BINARY_DIR}/arrow-src/cpp/src/arrow/ipc/writer.cc) +if (PSP_PYTHON_BUILD) + set(ARROW_SRCS + ${ARROW_SRCS} + ${CMAKE_BINARY_DIR}/arrow-src/cpp/src/arrow/datum.cc + ${CMAKE_BINARY_DIR}/arrow-src/cpp/src/arrow/io/file.cc + ${CMAKE_BINARY_DIR}/arrow-src/cpp/src/arrow/tensor/coo_converter.cc + ${CMAKE_BINARY_DIR}/arrow-src/cpp/src/arrow/tensor/csf_converter.cc + ${CMAKE_BINARY_DIR}/arrow-src/cpp/src/arrow/tensor/csx_converter.cc + ${CMAKE_BINARY_DIR}/arrow-src/cpp/src/arrow/util/formatting.cc + ${CMAKE_BINARY_DIR}/arrow-src/cpp/src/arrow/util/time.cc + ${CMAKE_BINARY_DIR}/arrow-src/cpp/src/arrow/vendored/double-conversion/bignum-dtoa.cc + ${CMAKE_BINARY_DIR}/arrow-src/cpp/src/arrow/vendored/double-conversion/fast-dtoa.cc + ${CMAKE_BINARY_DIR}/arrow-src/cpp/src/arrow/vendored/double-conversion/fixed-dtoa.cc + ) + + if(WIN32) + set(ARROW_SRCS + ${ARROW_SRCS} + ${CMAKE_BINARY_DIR}/arrow-src/cpp/src/arrow/vendored/musl/strptime.c) + endif() +endif() + set_property(SOURCE util/io_util.cc APPEND_STRING PROPERTY COMPILE_FLAGS " -Wno-unused-macros -stdlib=libc++") @@ -130,13 +152,17 @@ set(FBS_SRC ${CMAKE_BINARY_DIR}/arrow-src/cpp/src/arrow/ipc/feather.fbs) include_directories(src) + +# Build Arrow as a static library set(ARROW_BUILD_STATIC ON) if (PSP_WASM_BUILD) set(CMAKE_POSITION_INDEPENDENT_CODE OFF) elseif(PSP_CPP_BUILD OR PSP_PYTHON_BUILD) set(CMAKE_POSITION_INDEPENDENT_CODE ON) endif() + add_library(arrow STATIC ${ARROW_SRCS}) + target_compile_definitions(arrow PUBLIC ARROW_NO_DEPRECATED_API) target_compile_definitions(arrow PUBLIC ARROW_STATIC) diff --git a/cmake/date.txt.in b/cmake/date.txt.in index 6e476bfbbc..6e39696135 100644 --- a/cmake/date.txt.in +++ b/cmake/date.txt.in @@ -5,7 +5,7 @@ project(date-download NONE) include(ExternalProject) ExternalProject_Add(date GIT_REPOSITORY https://github.com/HowardHinnant/date.git - GIT_TAG master + GIT_TAG v3.0.0 SOURCE_DIR "${CMAKE_BINARY_DIR}/date-src" BINARY_DIR "${CMAKE_BINARY_DIR}/date-build" CONFIGURE_COMMAND "" diff --git a/cmake/flatbuffers.txt.in b/cmake/flatbuffers.txt.in new file mode 100644 index 0000000000..754a9c9e6d --- /dev/null +++ b/cmake/flatbuffers.txt.in @@ -0,0 +1,16 @@ +cmake_minimum_required(VERSION 3.7.2) + +project(flatbuffers-download NONE) + +include(ExternalProject) +ExternalProject_Add(flatbuffers + GIT_REPOSITORY https://github.com/google/flatbuffers.git + GIT_TAG v1.12.0 + SOURCE_DIR "${CMAKE_BINARY_DIR}/flatbuffers-src" + BINARY_DIR "${CMAKE_BINARY_DIR}/flatbuffers-build" + CONFIGURE_COMMAND "" + BUILD_COMMAND "" + INSTALL_COMMAND "" + TEST_COMMAND "" + CMAKE_ARGS "-DCMAKE_TOOLCHAIN_FILE=${CMAKE_TOOLCHAIN_FILE}" +) diff --git a/cmake/modules/FindFlatbuffers.cmake b/cmake/modules/FindFlatbuffers.cmake index 42cfe75438..3c77572686 100644 --- a/cmake/modules/FindFlatbuffers.cmake +++ b/cmake/modules/FindFlatbuffers.cmake @@ -22,26 +22,48 @@ # FLATBUFFERS_INCLUDE_DIR, directory containing headers # FLATBUFFERS_STATIC_LIB, path to flatbuffers's static library # FLATBUFFERS_COMPILER, path to flatc compiler +# +# TODO: [01-15-2021] now that we use Flatbuffers on all platforms, it might be +# a good time to figure out how we can install Flatbuffers as a dependency +# inside our CMakeLists (we would just need to build the flatc executable +# before our Arrow build starts). Right now, I've put in some hacks to make +# sure our Windows build works on Azure by pre-installing flatc (like we do on +# all other platforms), and then pulling down the headers for Windows so they +# can be included. # this might fail # https://gitlab.kitware.com/cmake/cmake/issues/19120 -find_path(FLATBUFFERS_INCLUDE_DIR flatbuffers/flatbuffers.h - PATHS ${FLATBUFFERS_ROOT}/include - HINTS /usr/local /usr/local/flatbuffers /usr/local/Homebrew /usr ~/homebrew/ /usr/local/include /usr/local/flatbuffers/include /usr/include ~/homebrew/include - NO_CMAKE_SYSTEM_PATH - NO_SYSTEM_ENVIRONMENT_PATH) +if (WIN32) + find_path(FLATBUFFERS_INCLUDE_DIR flatbuffers/flatbuffers.h + PATHS ${FLATBUFFERS_ROOT}/include) + + find_program(FLATBUFFERS_COMPILER flatc + PATHS ${FLATBUFFERS_ROOT}/bin) +else() + find_path(FLATBUFFERS_INCLUDE_DIR flatbuffers/flatbuffers.h + PATHS ${FLATBUFFERS_ROOT}/include + HINTS /usr/local /usr/local/flatbuffers /usr/local/Homebrew /usr ~/homebrew/ /usr/local/include /usr/local/flatbuffers/include /usr/include ~/homebrew/include + NO_CMAKE_SYSTEM_PATH + NO_SYSTEM_ENVIRONMENT_PATH) -find_program(FLATBUFFERS_COMPILER flatc - PATHS ${FLATBUFFERS_ROOT}/bin - HINTS /usr/local/bin /usr/bin /usr/local/Homebrew/bin ~/homebrew/bin - NO_CMAKE_SYSTEM_PATH - NO_SYSTEM_ENVIRONMENT_PATH) + find_program(FLATBUFFERS_COMPILER flatc + PATHS ${FLATBUFFERS_ROOT}/bin + HINTS /usr/local/bin /usr/bin /usr/local/Homebrew/bin ~/homebrew/bin + NO_CMAKE_SYSTEM_PATH + NO_SYSTEM_ENVIRONMENT_PATH) -if(NOT ${FLATBUFFERS_INCLUDE_DIR}) - # HACK - set(FLATBUFFERS_INCLUDE_DIR /usr/local/include) + if(NOT ${FLATBUFFERS_INCLUDE_DIR}) + # HACK + set(FLATBUFFERS_INCLUDE_DIR /usr/local/include) + endif() endif() include(FindPackageHandleStandardArgs) -find_package_handle_standard_args(FLATBUFFERS REQUIRED_VARS - FLATBUFFERS_INCLUDE_DIR FLATBUFFERS_COMPILER) \ No newline at end of file + +if (WIN32) + find_package_handle_standard_args(Flatbuffers REQUIRED_VARS + FLATBUFFERS_INCLUDE_DIR FLATBUFFERS_COMPILER) +else() + find_package_handle_standard_args(FLATBUFFERS REQUIRED_VARS + FLATBUFFERS_INCLUDE_DIR FLATBUFFERS_COMPILER) +endif() \ No newline at end of file diff --git a/cmake/modules/FindPyArrow.cmake b/cmake/modules/FindPyArrow.cmake deleted file mode 100644 index c832d926da..0000000000 --- a/cmake/modules/FindPyArrow.cmake +++ /dev/null @@ -1,82 +0,0 @@ -# Find the Python PyArrow package -# PYTHON_PYARROW_INCLUDE_DIR -# PYTHON_PYARROW_FOUND -# PYTHON_PYARROW_LIBRARY_DIR -# PYTHON_PYARROW_LIBRARIES -# will be set by this script -cmake_minimum_required(VERSION 2.6) - -# Find out the include path -execute_process( - COMMAND "${Python_EXECUTABLE}" -c - "from __future__ import print_function\ntry: import pyarrow; print(pyarrow.get_include(), end='')\nexcept:pass" - OUTPUT_VARIABLE __pyarrow_path) - -# And the lib dirs -execute_process( - COMMAND "${Python_EXECUTABLE}" -c - "from __future__ import print_function\ntry: import pyarrow; print(pyarrow.get_library_dirs()[0], end='')\nexcept:pass" - OUTPUT_VARIABLE __pyarrow_library_dirs) - -# And the lib dirs -execute_process( - COMMAND "${Python_EXECUTABLE}" -c - "from __future__ import print_function\ntry: import pyarrow; print(' '.join(pyarrow.get_libraries()), end='')\nexcept:pass" - OUTPUT_VARIABLE __pyarrow_libraries) - - # And the version -execute_process( - COMMAND "${Python_EXECUTABLE}" -c - "from __future__ import print_function\ntry: import pyarrow; print(pyarrow.__version__, end='')\nexcept:pass" - OUTPUT_VARIABLE __pyarrow_version) - - find_path(PYTHON_PYARROW_INCLUDE_DIR arrow/python/api.h - HINTS "${__pyarrow_path}" "${PYTHON_INCLUDE_PATH}" NO_DEFAULT_PATH) - -set(PYTHON_PYARROW_LIBRARY_DIR ${__pyarrow_library_dirs}) - -# Figure out the major version for the .so/.dylibs -string(REPLACE "." ";" PYARROW_VERSION_LIST ${__pyarrow_version}) -list(GET PYARROW_VERSION_LIST 0 PYARROW_VERSION_MAJOR) -list(GET PYARROW_VERSION_LIST 1 PYARROW_VERSION_MINOR) -list(GET PYARROW_VERSION_LIST 2 PYARROW_VERSION_PATCH) - -if(${CMAKE_SYSTEM_NAME} MATCHES "Windows") - # windows its just "arrow.dll" - set(PYTHON_PYARROW_PYTHON_SHARED_LIBRARY "arrow_python") - set(PYTHON_PYARROW_ARROW_SHARED_LIBRARY "arrow") -elseif (CMAKE_SYSTEM_NAME MATCHES "Darwin" AND ${PYARROW_VERSION_MAJOR} EQUAL "0") - # Link against pre-built libarrow on MacOS - set(PYTHON_PYARROW_PYTHON_SHARED_LIBRARY ${PYTHON_PYARROW_LIBRARY_DIR}/${CMAKE_SHARED_LIBRARY_PREFIX}arrow_python.${PYARROW_VERSION_MINOR}.dylib) - set(PYTHON_PYARROW_ARROW_SHARED_LIBRARY ${PYTHON_PYARROW_LIBRARY_DIR}/${CMAKE_SHARED_LIBRARY_PREFIX}arrow.${PYARROW_VERSION_MINOR}.dylib) -elseif (CMAKE_SYSTEM_NAME MATCHES "Darwin") - # Link against pre-built libarrow on MacOS - set(PYTHON_PYARROW_PYTHON_SHARED_LIBRARY ${PYTHON_PYARROW_LIBRARY_DIR}/${CMAKE_SHARED_LIBRARY_PREFIX}arrow_python.${PYARROW_VERSION_MAJOR}00.dylib) - set(PYTHON_PYARROW_ARROW_SHARED_LIBRARY ${PYTHON_PYARROW_LIBRARY_DIR}/${CMAKE_SHARED_LIBRARY_PREFIX}arrow.${PYARROW_VERSION_MAJOR}00.dylib) -elseif (${PYARROW_VERSION_MAJOR} EQUAL "0") - set(PYTHON_PYARROW_PYTHON_SHARED_LIBRARY ${PYTHON_PYARROW_LIBRARY_DIR}/${CMAKE_SHARED_LIBRARY_PREFIX}arrow_python${CMAKE_SHARED_LIBRARY_SUFFIX}.${PYARROW_VERSION_MINOR}) - set(PYTHON_PYARROW_ARROW_SHARED_LIBRARY ${PYTHON_PYARROW_LIBRARY_DIR}/${CMAKE_SHARED_LIBRARY_PREFIX}arrow${CMAKE_SHARED_LIBRARY_SUFFIX}.${PYARROW_VERSION_MINOR}) -else() - # linux - set(PYTHON_PYARROW_PYTHON_SHARED_LIBRARY ${PYTHON_PYARROW_LIBRARY_DIR}/${CMAKE_SHARED_LIBRARY_PREFIX}arrow_python${CMAKE_SHARED_LIBRARY_SUFFIX}.${PYARROW_VERSION_MAJOR}00) - set(PYTHON_PYARROW_ARROW_SHARED_LIBRARY ${PYTHON_PYARROW_LIBRARY_DIR}/${CMAKE_SHARED_LIBRARY_PREFIX}arrow${CMAKE_SHARED_LIBRARY_SUFFIX}.${PYARROW_VERSION_MAJOR}00) -endif() - -set(PYTHON_PYARROW_LIBRARIES ${PYTHON_PYARROW_PYTHON_SHARED_LIBRARY} ${PYTHON_PYARROW_ARROW_SHARED_LIBRARY}) - -if(PYTHON_PYARROW_INCLUDE_DIR AND PYTHON_PYARROW_LIBRARIES) - set(PYTHON_PYARROW_FOUND 1 CACHE INTERNAL "Python pyarrow found") -endif() - - -# set(PYTHON_PYARROW_LIBRARIES ${PYTHON_PYARROW_PYTHON_SHARED_LIBRARY} ${PYTHON_PYARROW_ARROW_SHARED_LIBRARY}) -# else() -# # linux -# set(PYTHON_PYARROW_PYTHON_SHARED_LIBRARY ${CMAKE_SHARED_LIBRARY_PREFIX}arrow_python${CMAKE_SHARED_LIBRARY_SUFFIX}.${PYARROW_VERSION_MINOR}) -# set(PYTHON_PYARROW_ARROW_SHARED_LIBRARY ${CMAKE_SHARED_LIBRARY_PREFIX}arrow${CMAKE_SHARED_LIBRARY_SUFFIX}.${PYARROW_VERSION_MINOR}) - - - -include(FindPackageHandleStandardArgs) -find_package_handle_standard_args(PyArrow REQUIRED_VARS PYTHON_PYARROW_INCLUDE_DIR PYTHON_PYARROW_LIBRARIES PYTHON_PYARROW_LIBRARY_DIR - VERSION_VAR __pyarrow_version) diff --git a/cmake/modules/FindPybind.cmake b/cmake/modules/FindPybind.cmake index cb31165cd0..e67e74a3d5 100644 --- a/cmake/modules/FindPybind.cmake +++ b/cmake/modules/FindPybind.cmake @@ -1,4 +1,4 @@ -# Find the Python PyArrow package +# Find the Python PyBind package # PYTHON_PYBIND_INCLUDE_DIR # PYTHON_PYBIND_FOUND # will be set by this script diff --git a/cpp/perspective/CMakeLists.txt b/cpp/perspective/CMakeLists.txt index d89f6e1761..e650ae902d 100644 --- a/cpp/perspective/CMakeLists.txt +++ b/cpp/perspective/CMakeLists.txt @@ -42,9 +42,9 @@ function (psp_build_message message) set(BUILD_MESSAGE "${BUILD_MESSAGE}\n${message}") endfunction() -############################### -# Helper to grab dependencies # -############################### +################################################### +# Helper to grab dependencies from remote sources # +################################################### function (psp_build_dep name cmake_file) if(EXISTS ${CMAKE_BINARY_DIR}/${name}-build) psp_build_message("${Cyan}Dependency found - not rebuilding - ${CMAKE_BINARY_DIR}/${name}-build${ColorReset}") @@ -69,7 +69,7 @@ function (psp_build_dep name cmake_file) endif() if(${name} STREQUAL arrow) - # Overwrite arrow's CMakeLists with our custom, minimal one + # Overwrite arrow's CMakeLists with our custom, minimal CMakeLists. configure_file(${PSP_CMAKE_MODULE_PATH}/arrow/CMakeLists.txt ${CMAKE_BINARY_DIR}/arrow-src/cpp/ COPYONLY) configure_file(${PSP_CMAKE_MODULE_PATH}/arrow/config.h ${CMAKE_BINARY_DIR}/arrow-src/cpp/src/arrow/util/ COPYONLY) add_subdirectory(${CMAKE_BINARY_DIR}/arrow-src/cpp/ @@ -222,9 +222,6 @@ if (PSP_WASM_BUILD) # Include this docker-only directory. include_directories("/boost_includes") - # Build Rapidjson as it is used in the minimal arrow to be built later. - psp_build_dep("rapidjson" "${PSP_CMAKE_MODULE_PATH}/rapidjson.txt.in") - set(EXTENDED_FLAGS " \ --bind \ --source-map-base ./build/ \ @@ -315,7 +312,9 @@ elseif(PSP_CPP_BUILD OR PSP_PYTHON_BUILD) endif() endif() + # Boost is a system dependency and must be present and built on the system. find_package(Boost REQUIRED) + if(NOT Boost_FOUND) message(FATAL_ERROR "${Red}Boost could not be located${ColorReset}") else() @@ -323,7 +322,10 @@ elseif(PSP_CPP_BUILD OR PSP_PYTHON_BUILD) include_directories( ${Boost_INCLUDE_DIRS} ) endif() + # TBB is a dependency that we can build as part of our build process, so it + # does not necessarily have to be present before Perspective is built. find_package(TBB) + if(NOT TBB_FOUND) psp_build_message("${Red}TBB could not be located - building TBB from external source ${ColorReset}") psp_build_dep("tbb" "${PSP_CMAKE_MODULE_PATH}/TBB.txt.in") @@ -415,17 +417,6 @@ elseif(PSP_CPP_BUILD OR PSP_PYTHON_BUILD) psp_build_message("${Cyan}Numpy found: ${PYTHON_NUMPY_INCLUDE_DIR}${ColorReset}") include_directories( ${PYTHON_NUMPY_INCLUDE_DIR}) endif() - - find_package(PyArrow REQUIRED) - - if(NOT PYTHON_PYARROW_FOUND) - message(FATAL_ERROR "${Red}PyArrow could not be located${ColorReset}") - else() - psp_build_message("${Cyan}PyArrow found: PYTHON_PYARROW_INCLUDE_DIR - ${PYTHON_PYARROW_INCLUDE_DIR}${ColorReset}") - psp_build_message("${Cyan}Using pre-built ${PYTHON_PYARROW_PYTHON_SHARED_LIBRARY} ${PYTHON_PYARROW_ARROW_SHARED_LIBRARY} from: ${PYTHON_PYARROW_LIBRARY_DIR}${ColorReset}") - include_directories(${PYTHON_PYARROW_INCLUDE_DIR}) - link_directories(${PYTHON_PYARROW_LIBRARY_DIR}) - endif() ##################### endif() endif() @@ -435,21 +426,29 @@ psp_build_dep("date" "${PSP_CMAKE_MODULE_PATH}/date.txt.in") psp_build_dep("hopscotch" "${PSP_CMAKE_MODULE_PATH}/hopscotch.txt.in") psp_build_dep("ordered-map" "${PSP_CMAKE_MODULE_PATH}/ordered-map.txt.in") -# For WASM/CPP build, build minimal arrow from source -if (NOT PSP_PYTHON_BUILD) - # build arrow + dependencies from source for Emscripten and C++ - message("${Cyan}Building minimal Apache Arrow${ColorReset}") +# Build minimal arrow for both Emscripten and Python +message("${Cyan}Building minimal Apache Arrow${ColorReset}") - psp_build_dep("double-conversion" "${PSP_CMAKE_MODULE_PATH}/double-conversion.txt.in") - psp_build_dep("arrow" "${PSP_CMAKE_MODULE_PATH}/arrow.txt.in") +# Build arrow dependencies +psp_build_dep("rapidjson" "${PSP_CMAKE_MODULE_PATH}/rapidjson.txt.in") +psp_build_dep("double-conversion" "${PSP_CMAKE_MODULE_PATH}/double-conversion.txt.in") - find_package(Flatbuffers) - if(NOT FLATBUFFERS_FOUND) - message(FATAL_ERROR"${Red}Flatbuffers could not be located${ColorReset}") - else() - psp_build_message("${Cyan}Found Flatbuffers in ${FLATBUFFERS_INCLUDE_DIR}${ColorReset}") - include_directories( ${FLATBUFFERS_INCLUDE_DIR} ) - endif() +# FIXME: this is a hack to get Flatbuffers working on Azure Win64 by making the +# headers accessible. The actual flatc executable is installed using +# Chocolatey for our Azure Windows job. +if (WIN32) + psp_build_dep("flatbuffers" "${PSP_CMAKE_MODULE_PATH}/flatbuffers.txt.in") +endif() + +# Build minimal arrow itself +psp_build_dep("arrow" "${PSP_CMAKE_MODULE_PATH}/arrow.txt.in") + +find_package(Flatbuffers) +if(NOT FLATBUFFERS_FOUND) + message(FATAL_ERROR"${Red}Flatbuffers could not be located${ColorReset}") +else() + psp_build_message("${Cyan}Found Flatbuffers in ${FLATBUFFERS_INCLUDE_DIR}${ColorReset}") + include_directories( ${FLATBUFFERS_INCLUDE_DIR} ) endif() ##################### @@ -593,14 +592,14 @@ if (PSP_WASM_BUILD) add_executable(perspective.async src/cpp/emscripten.cpp) target_link_libraries(perspective.async psp "${ASYNC_MODE_FLAGS}") target_compile_definitions(perspective.async PRIVATE PSP_ENABLE_WASM=1) + set_target_properties(perspective.async PROPERTIES COMPILE_FLAGS "${ASYNC_MODE_FLAGS}") set_target_properties(perspective.async PROPERTIES RUNTIME_OUTPUT_DIRECTORY "./build/") set_target_properties(perspective.async PROPERTIES OUTPUT_NAME "psp.async") elseif(PSP_CPP_BUILD OR PSP_PYTHON_BUILD) if(NOT WIN32) set(CMAKE_SHARED_LIBRARY_SUFFIX .so) - # Look for the binary using @loader_path (relative to binary location) instead of @rpath - # and include arrow in @rpath so it can be found by libbinding/libpsp + # Look for the binary using @loader_path (relative to binary location) set(CMAKE_MACOSX_RPATH TRUE) set(CMAKE_SKIP_BUILD_RPATH FALSE) set(CMAKE_BUILD_WITH_INSTALL_RPATH TRUE) @@ -635,25 +634,19 @@ elseif(PSP_CPP_BUILD OR PSP_PYTHON_BUILD) # .dll not importable set_property(TARGET binding PROPERTY SUFFIX .pyd) - elseif (MACOS OR NOT MANYLINUX) - target_compile_options(binding PRIVATE -Wdeprecated-declarations) - - # Add a relative path to search for PyArrow - when Perspective is - # installed from a wheel, PyArrow may not be in the same directory - # as the PyArrow which was used to build the wheel. - # - # Assuming that both Pyarrow and Perspective are installed in - # `site-packages`, the relative search path should be able to pick - # up pyarrow. This is only enabled for *nix (non-manylinux), as `auditwheel` - # will not delocate libarrow properly if it is in the rpath. - set_property(TARGET psp PROPERTY INSTALL_RPATH ${CMAKE_INSTALL_RPATH} ${module_origin_path} ${module_origin_path}../../pyarrow/ ${PYTHON_PYARROW_LIBRARY_DIR} ${PSP_PYTHON_ARROWINSTALLDIR}) - set_property(TARGET binding PROPERTY INSTALL_RPATH ${CMAKE_INSTALL_RPATH} ${module_origin_path} ${module_origin_path}../../pyarrow/ ${PYTHON_PYARROW_LIBRARY_DIR} ${PSP_PYTHON_ARROWINSTALLDIR}) else() target_compile_options(binding PRIVATE -Wdeprecated-declarations) endif() - target_link_libraries(psp ${PYTHON_PYARROW_LIBRARIES}) - target_link_libraries(binding ${PYTHON_PYARROW_LIBRARIES}) + # python 2.7 build on manylinux needs -lrt for access to time functions, + # otherwise we get an "undefined symbol: clock_gettime" error when + # building minimal arrow. + if (PSP_PYTHON_BUILD AND MANYLINUX AND PSP_PYTHON_VERSION STREQUAL "2.7") + target_link_libraries(psp rt) + endif() + + # Link against minimal arrow static library + target_link_libraries(psp arrow) target_link_libraries(psp tbb) target_link_libraries(binding tbb) @@ -668,10 +661,6 @@ elseif(PSP_CPP_BUILD OR PSP_PYTHON_BUILD) add_custom_command(TARGET binding POST_BUILD COMMAND ${CMAKE_COMMAND} -E copy $ ${PSP_PYTHON_SRC}/table/) if(WIN32) - # inline arrow dlls - file(GLOB ARROW_DLLS "${PYTHON_PYARROW_LIBRARY_DIR}/*.dll") - - add_custom_command(TARGET binding POST_BUILD COMMAND ${CMAKE_COMMAND} -E copy ${ARROW_DLLS} ${PSP_PYTHON_SRC}/table/) if(NOT TBB_FOUND) add_custom_command(TARGET binding POST_BUILD COMMAND ${CMAKE_COMMAND} -E copy $ ${PSP_PYTHON_SRC}/table/) endif() @@ -679,8 +668,6 @@ elseif(PSP_CPP_BUILD OR PSP_PYTHON_BUILD) ######################## else() add_library(psp SHARED ${WASM_SOURCE_FILES}) - - # Link perspective against custom-built minimal arrow target_link_libraries(psp arrow) endif() diff --git a/docker/python/manylinux2010/Dockerfile b/docker/python/manylinux2010/Dockerfile index d82b996479..7c956aabde 100644 --- a/docker/python/manylinux2010/Dockerfile +++ b/docker/python/manylinux2010/Dockerfile @@ -53,18 +53,10 @@ RUN tar xfz boost_1_71_0.tar.gz RUN cd boost_1_71_0 && ./bootstrap.sh RUN cd boost_1_71_0 && ./b2 -j8 --with-program_options --with-filesystem --with-system install -RUN python2.7 -m pip install 'numpy>=1.13.1' 'pandas>=0.22.0' git+https://chromium.googlesource.com/external/gyp -RUN python3.6 -m pip install 'numpy>=1.13.1' 'pandas>=0.22.0' -RUN python3.7 -m pip install 'numpy>=1.13.1' 'pandas>=0.22.0' -RUN python3.8 -m pip install 'numpy>=1.13.1' 'pandas>=0.22.0' - -# install pyarrow -RUN wget https://github.com/apache/arrow/archive/apache-arrow-0.16.0.tar.gz >/dev/null 2>&1 || echo "wget arrow failed" -RUN tar xfz apache-arrow-0.16.0.tar.gz -RUN cd arrow-apache-arrow-0.16.0 && mkdir build && cd build && cmake ../cpp/ -DPython_ADDITIONAL_VERSIONS=2.7 -DARROW_RPATH_ORIGIN=ON -DARROW_PYTHON=ON -DARROW_FLIGHT=OFF -DARROW_IPC=ON -DARROW_COMPUTE=ON -DCMAKE_INSTALL_PREFIX=/usr -DARROW_DATASET=OFF -DARROW_BUILD_UTILITIES=OFF -DARROW_JEMALLOC=OFF -DARROW_DEPENDENCY_SOURCE=BUNDLED -DARROW_HDFS=OFF -DARROW_WITH_BACKTRACE=OFF -DARROW_WITH_BROTLI=OFF -DARROW_WITH_BZ2=OFF -DARROW_WITH_LZ4=OFF -DARROW_WITH_SNAPPY=OFF -DARROW_WITH_ZLIB=OFF -DARROW_WITH_ZSTD=OFF && make -j2 && sudo make install && cd ../python && PYARROW_BUNDLE_ARROW_CPP=1 sudo python2.7 setup.py install --single-version-externally-managed --record RECORD -RUN cd arrow-apache-arrow-0.16.0 && rm -rf build && mkdir build && cd build && cmake ../cpp/ -DPython_ADDITIONAL_VERSIONS=3.6 -DARROW_RPATH_ORIGIN=ON -DARROW_PYTHON=ON -DARROW_FLIGHT=OFF -DARROW_IPC=ON -DARROW_COMPUTE=ON -DCMAKE_INSTALL_PREFIX=/usr -DARROW_DATASET=OFF -DARROW_BUILD_UTILITIES=OFF -DARROW_JEMALLOC=OFF -DARROW_DEPENDENCY_SOURCE=BUNDLED -DARROW_HDFS=OFF -DARROW_WITH_BACKTRACE=OFF -DARROW_WITH_BROTLI=OFF -DARROW_WITH_BZ2=OFF -DARROW_WITH_LZ4=OFF -DARROW_WITH_SNAPPY=OFF -DARROW_WITH_ZLIB=OFF -DARROW_WITH_ZSTD=OFF && make -j2 && sudo make install && cd ../python && PYARROW_BUNDLE_ARROW_CPP=1 sudo python3.6 setup.py install --single-version-externally-managed --record RECORD -RUN cd arrow-apache-arrow-0.16.0 && rm -rf build && mkdir build && cd build && cmake ../cpp/ -DPython_ADDITIONAL_VERSIONS=3.7 -DARROW_RPATH_ORIGIN=ON -DARROW_PYTHON=ON -DARROW_FLIGHT=OFF -DARROW_IPC=ON -DARROW_COMPUTE=ON -DCMAKE_INSTALL_PREFIX=/usr -DARROW_DATASET=OFF -DARROW_BUILD_UTILITIES=OFF -DARROW_JEMALLOC=OFF -DARROW_DEPENDENCY_SOURCE=BUNDLED -DARROW_HDFS=OFF -DARROW_WITH_BACKTRACE=OFF -DARROW_WITH_BROTLI=OFF -DARROW_WITH_BZ2=OFF -DARROW_WITH_LZ4=OFF -DARROW_WITH_SNAPPY=OFF -DARROW_WITH_ZLIB=OFF -DARROW_WITH_ZSTD=OFF && make -j2 && sudo make install && cd ../python && PYARROW_BUNDLE_ARROW_CPP=1 sudo python3.7 setup.py install --single-version-externally-managed --record RECORD -RUN cd arrow-apache-arrow-0.16.0 && rm -rf build && mkdir build && cd build && cmake ../cpp/ -DPython_ADDITIONAL_VERSIONS=3.8 -DARROW_RPATH_ORIGIN=ON -DARROW_PYTHON=ON -DARROW_FLIGHT=OFF -DARROW_IPC=ON -DARROW_COMPUTE=ON -DCMAKE_INSTALL_PREFIX=/usr -DARROW_DATASET=OFF -DARROW_BUILD_UTILITIES=OFF -DARROW_JEMALLOC=OFF -DARROW_DEPENDENCY_SOURCE=BUNDLED -DARROW_HDFS=OFF -DARROW_WITH_BACKTRACE=OFF -DARROW_WITH_BROTLI=OFF -DARROW_WITH_BZ2=OFF -DARROW_WITH_LZ4=OFF -DARROW_WITH_SNAPPY=OFF -DARROW_WITH_ZLIB=OFF -DARROW_WITH_ZSTD=OFF && make -j2 && sudo make install && cd ../python && PYARROW_BUNDLE_ARROW_CPP=1 sudo python3.8 setup.py install --single-version-externally-managed --record RECORD +RUN python2.7 -m pip install 'numpy>=1.13.1' 'pandas>=0.22.0' 'pyarrow>=0.16.0' git+https://chromium.googlesource.com/external/gyp +RUN python3.6 -m pip install 'numpy>=1.13.1' 'pandas>=0.22.0' 'pyarrow>=2.0.0' +RUN python3.7 -m pip install 'numpy>=1.13.1' 'pandas>=0.22.0' 'pyarrow>=2.0.0' +RUN python3.8 -m pip install 'numpy>=1.13.1' 'pandas>=0.22.0' 'pyarrow>=2.0.0' # install node RUN curl -sL https://rpm.nodesource.com/setup_10.x | sudo bash - diff --git a/docker/python/manylinux2014/Dockerfile b/docker/python/manylinux2014/Dockerfile index 183145381e..7ec5b1f843 100644 --- a/docker/python/manylinux2014/Dockerfile +++ b/docker/python/manylinux2014/Dockerfile @@ -50,16 +50,9 @@ RUN tar xfz boost_1_71_0.tar.gz RUN cd boost_1_71_0 && ./bootstrap.sh RUN cd boost_1_71_0 && ./b2 -j8 --with-program_options --with-filesystem --with-system install -RUN python3.6 -m pip install 'numpy>=1.13.1' 'pandas>=0.22.0' -RUN python3.7 -m pip install 'numpy>=1.13.1' 'pandas>=0.22.0' -RUN python3.8 -m pip install 'numpy>=1.13.1' 'pandas>=0.22.0' - -# install pyarrow -RUN wget https://github.com/apache/arrow/archive/apache-arrow-0.16.0.tar.gz >/dev/null 2>&1 || echo "wget arrow failed" -RUN tar xfz apache-arrow-0.16.0.tar.gz -RUN cd arrow-apache-arrow-0.16.0 && mkdir build && cd build && cmake ../cpp/ -DPython_ADDITIONAL_VERSIONS=3.7 -DARROW_RPATH_ORIGIN=ON -DARROW_PYTHON=ON -DARROW_FLIGHT=OFF -DARROW_IPC=ON -DARROW_COMPUTE=ON -DCMAKE_INSTALL_PREFIX=/usr -DARROW_DATASET=OFF -DARROW_BUILD_UTILITIES=OFF -DARROW_JEMALLOC=OFF -DARROW_DEPENDENCY_SOURCE=BUNDLED -DARROW_HDFS=OFF -DARROW_WITH_BACKTRACE=OFF -DARROW_WITH_BROTLI=OFF -DARROW_WITH_BZ2=OFF -DARROW_WITH_LZ4=OFF -DARROW_WITH_SNAPPY=OFF -DARROW_WITH_ZLIB=OFF -DARROW_WITH_ZSTD=OFF && make -j2 && sudo make install && cd ../python && PYARROW_BUNDLE_ARROW_CPP=1 sudo python3.7 setup.py install --single-version-externally-managed --record RECORD -RUN cd arrow-apache-arrow-0.16.0 && rm -rf build && mkdir build && cd build && cmake ../cpp/ -DPython_ADDITIONAL_VERSIONS=3.6 -DARROW_RPATH_ORIGIN=ON -DARROW_PYTHON=ON -DARROW_FLIGHT=OFF -DARROW_IPC=ON -DARROW_COMPUTE=ON -DCMAKE_INSTALL_PREFIX=/usr -DARROW_DATASET=OFF -DARROW_BUILD_UTILITIES=OFF -DARROW_JEMALLOC=OFF -DARROW_DEPENDENCY_SOURCE=BUNDLED -DARROW_HDFS=OFF -DARROW_WITH_BACKTRACE=OFF -DARROW_WITH_BROTLI=OFF -DARROW_WITH_BZ2=OFF -DARROW_WITH_LZ4=OFF -DARROW_WITH_SNAPPY=OFF -DARROW_WITH_ZLIB=OFF -DARROW_WITH_ZSTD=OFF && make -j2 && sudo make install && cd ../python && PYARROW_BUNDLE_ARROW_CPP=1 sudo python3.6 setup.py install --single-version-externally-managed --record RECORD -RUN cd arrow-apache-arrow-0.16.0 && rm -rf build && mkdir build && cd build && cmake ../cpp/ -DPython_ADDITIONAL_VERSIONS=3.8 -DARROW_RPATH_ORIGIN=ON -DARROW_PYTHON=ON -DARROW_FLIGHT=OFF -DARROW_IPC=ON -DARROW_COMPUTE=ON -DCMAKE_INSTALL_PREFIX=/usr -DARROW_DATASET=OFF -DARROW_BUILD_UTILITIES=OFF -DARROW_JEMALLOC=OFF -DARROW_DEPENDENCY_SOURCE=BUNDLED -DARROW_HDFS=OFF -DARROW_WITH_BACKTRACE=OFF -DARROW_WITH_BROTLI=OFF -DARROW_WITH_BZ2=OFF -DARROW_WITH_LZ4=OFF -DARROW_WITH_SNAPPY=OFF -DARROW_WITH_ZLIB=OFF -DARROW_WITH_ZSTD=OFF && make -j2 && sudo make install && cd ../python && PYARROW_BUNDLE_ARROW_CPP=1 sudo python3.8 setup.py install --single-version-externally-managed --record RECORD +RUN python3.6 -m pip install 'numpy>=1.13.1' 'pandas>=0.22.0' 'pyarrow>=2.0.0' +RUN python3.7 -m pip install 'numpy>=1.13.1' 'pandas>=0.22.0' 'pyarrow>=2.0.0' +RUN python3.8 -m pip install 'numpy>=1.13.1' 'pandas>=0.22.0' 'pyarrow>=2.0.0' # install node RUN curl -sL https://rpm.nodesource.com/setup_10.x | sudo bash - diff --git a/docs/md/development.md b/docs/md/development.md index ba9bdc5af9..1e1fac7c57 100644 --- a/docs/md/development.md +++ b/docs/md/development.md @@ -47,29 +47,32 @@ yarn start simple #### Docker [Docker](https://docs.docker.com/install/) images with pre-built development -environments are provided for the Javascript and Python libraries. +environments are provided for the Python libraries. To build Perspective using Docker, select the option in `yarn setup`. +### System Dependencies + +`Perspective.js` and `perspective-python` **require** the following system dependencies to be installed: + +- [CMake](https://cmake.org/) (version 3.15.4 or higher) +- [Boost](https://www.boost.org/) (version 1.67 or higher, must be built - not header-only) +- [Flatbuffers](https://google.github.io/flatbuffers/flatbuffers_guide_building.html) + ## `Perspective.js` To build the JavaScript library, which includes WebAssembly compilation, [Emscripten](https://github.com/kripken/emscripten) and its prerequisites are -required. A Docker image is provided with the correct environment and -prerequisites. - -#### System Dependencies +required. -Perspective requires some system dependencies to be installed before it can be -built using Emscripten: - -- [CMake](https://cmake.org/) (version 3.15.4 or higher) -- [Boost](https://www.boost.org/) (version 1.67 or higher) -- [Flatbuffers](https://google.github.io/flatbuffers/flatbuffers_guide_building.html) +`Perspective.js` specifies its Emscripten version dependency in `package.json`, +and the correct version of Emscripten will be installed with other JS +dependencies by running `yarn`. #### Building via local EMSDK -To build using local Emscripten, +To build using an Emscripten install on your local system and not the +Emscripten bundled with Perspective in its `package.json`, [install](https://emscripten.org/docs/getting_started/downloads.html) the Emscripten SDK, then activate and export the latest `emsdk` environment via [`emsdk_env.sh`](https://github.com/juj/emsdk): @@ -78,14 +81,14 @@ Emscripten SDK, then activate and export the latest `emsdk` environment via source emsdk/emsdk_env.sh ``` -We currently use Emscripten version `1.39.13` — deviating from this specific +We currently use Emscripten version `2.0.6` — deviating from this specific version of Emscripten can introduce various errors that are extremely difficult to debug. To install this specific version of Emscripten: ```bash -./emsdk install 1.39.13 +./emsdk install 2.0.6 ``` ## `perspective-python` @@ -104,14 +107,6 @@ To build the Python 2 version of the library, use the `--python2` flag: yarn build --python2 ``` -### System Dependencies - -`perspective-python` requires the following system dependencies to be installed before it can be -built from source: - -- [CMake](https://cmake.org/) (version 3.15.4 or higher) -- [Boost](https://www.boost.org/) (version 1.67 or higher) - ## System-Specific Instructions ### MacOS/OSX @@ -175,7 +170,7 @@ You can run the test suite simply with the standard NPM command, which will both build the test suite for every package and run them. ```bash -yarn test +yarn test [--debug] ``` A test name regex can be passed to `jest` via the same `-t` flag: @@ -243,10 +238,7 @@ Verbosity in the tests can be enabled with the `--verbose` flag. ### Troubleshooting installation from source If you are installing from a source distribution (sdist), make sure you have -CMake and Boost headers present on your machine: - -- CMake (version 3.15.4 or higher) -- Boost Headers (version 1.67) +the [System Dependencies](#system-dependencies) installed. Try installing in verbose mode: @@ -258,13 +250,7 @@ The most common culprits are: - CMake version too old - Boost headers are missing or too old -- PyArrow not installed prior to installing perspective - -Additionally, due to PEP-518 and build isolation, its possible that the version -of PyArrow that pip uses to build perspective-python is different from the one -you have installed. To disable this, pass the `--no-build-isolation` flag to -pip. - +- Flatbuffers not installed prior to installing Perspective #### Timezones in Python Tests Python tests are configured to use the `UTC` time zone. If running tests locally, diff --git a/docs/md/python.md b/docs/md/python.md index febd68994b..ef8df17a91 100644 --- a/docs/md/python.md +++ b/docs/md/python.md @@ -63,16 +63,6 @@ NumPy record arrays are all supported in `perspective-python`. pip install perspective-python ``` -#### Wheels PyArrow linkage - -Because we compile Apache Arrow from source to webassembly via Emscripten, we have a tight coupling on the specific version of Apache Arrow that must be used. As such, we link against a specific Apache Arrow version which must be present. Currently, our wheels build against PyArrow==0.17.1 for Python 3.* and PyArrow==0.16.0 for Python 2.7. - -To ignore compiled wheels and install from source with pip, install via - -```bash -pip install --no-binary perspective-python -``` - ### Jupyterlab `PerspectiveWidget` is a JupyterLab widget that implements the same API as diff --git a/python/perspective/pyproject.toml b/python/perspective/pyproject.toml index c3cb3c9c5a..d312a37713 100644 --- a/python/perspective/pyproject.toml +++ b/python/perspective/pyproject.toml @@ -1,3 +1,3 @@ [build-system] # Minimum requirements for the build system to execute. -requires = ["setuptools", "wheel", "backports.shutil_which ; python_version <'3'", "numpy>=1.13.1", "pyarrow>=0.16.0,<1"] +requires = ["setuptools", "wheel", "backports.shutil_which ; python_version <'3'", "numpy>=1.13.1"] diff --git a/python/perspective/setup.py b/python/perspective/setup.py index fe8de5c1c9..d1e04dc4ff 100644 --- a/python/perspective/setup.py +++ b/python/perspective/setup.py @@ -10,7 +10,6 @@ from setuptools.command.build_ext import build_ext from setuptools.command.sdist import sdist from distutils.version import LooseVersion -from distutils import sysconfig from codecs import open import io @@ -49,7 +48,6 @@ def which(x): "future>=0.16.0", "numpy>=1.13.1", "pandas>=0.22.0", - "pyarrow>=0.16.0,<1", "python-dateutil>=2.8.0", "six>=1.11.0", "tornado>=4.5.3", @@ -69,6 +67,7 @@ def which(x): "flake8>=3.7.8", "mock", "pybind11>=2.4.0", + "pyarrow>=0.16.0", "pytest>=4.3.0", "pytest-cov>=2.6.1", "pytest-check-links", @@ -146,14 +145,6 @@ def build_extension_cmake(self, ext): "-DPSP_PYTHON_VERSION={}".format(PYTHON_VERSION), "-DPython_ADDITIONAL_VERSIONS={}".format(PYTHON_VERSION), "-DPython_FIND_VERSION={}".format(PYTHON_VERSION), - "-DPSP_PYTHON_ARROWINSTALLDIR={}".format( - os.environ.get( - "PSP_PYTHON_ARROWINSTALLDIR", - os.path.join(sysconfig.get_python_lib(), "pyarrow").replace( - "\\", "/" - ), - ) - ), "-DPython_EXECUTABLE={}".format(sys.executable).replace("\\", "/"), "-DPython_ROOT_DIR={}".format(sys.prefix).replace("\\", "/"), "-DPython_ROOT={}".format(sys.prefix).replace("\\", "/"), diff --git a/scripts/_wheel_python.js b/scripts/_wheel_python.js index 795b11c403..b6d9616a09 100644 --- a/scripts/_wheel_python.js +++ b/scripts/_wheel_python.js @@ -61,12 +61,12 @@ try { // Create a wheel if (MANYLINUX_VERSION) { // install deps - - // These are system deps that may only be in place from pep-517/518 so lets - // reinstall them to be sure - cmd += `${PYTHON} -m pip install 'numpy>=1.13.1' 'pyarrow>=0.16.0,<1' && `; - // remove the build folder so we completely rebuild (and pick up the + // These are system deps that may only be in place from pep-517/518 so + // lets reinstall them to be sure + cmd += `${PYTHON} -m pip install 'numpy>=1.13.1' && `; + + // remove the build folder so we completely rebuild (and pick up the // libs we just installed above, since this build method won't use // pep-517/518) cmd += `rm -rf build/ &&`; @@ -83,8 +83,8 @@ try { } cmd += `&& ${PYTHON_INTERPRETER} -m auditwheel -v show ./dist/*.whl && ${PYTHON_INTERPRETER} -m auditwheel -v repair -L .lib ./dist/*.whl`; } else if (IS_MACOS) { - // Don't need to do any cleaning here since we will reuse the cmake cache - // and pyarrow/numpy paths from the pep-517/518 build in build_python.js + // Don't need to do any cleaning here since we will reuse the cmake + // cache and numpy paths from the pep-517/518 build in build_python.js cmd += `${PYTHON} setup.py bdist_wheel`; cmd += " && mkdir -p ./wheelhouse && cp -v ./dist/*.whl ./wheelhouse"; } else {