From 459c78b0656258ca556d801ee04a0b6f9f15ec77 Mon Sep 17 00:00:00 2001 From: Andrew Stein Date: Sun, 6 Sep 2020 01:26:28 -0400 Subject: [PATCH] Upgrade Arrow to 1.0.1 --- cmake/arrow.txt.in | 2 +- cmake/arrow/CMakeLists.txt | 19 ++++++++++--- cmake/arrow/config.h | 6 ++--- cmake/modules/FindPyArrow.cmake | 20 +++----------- cpp/perspective/CMakeLists.txt | 28 +++++++++++++------ cpp/perspective/src/cpp/arrow_loader.cpp | 34 +++++++++++++----------- cpp/perspective/src/cpp/arrow_writer.cpp | 17 +++++++++--- cpp/perspective/src/cpp/view.cpp | 10 +++---- python/perspective/pyproject.toml | 2 +- python/perspective/setup.py | 2 +- 10 files changed, 81 insertions(+), 59 deletions(-) diff --git a/cmake/arrow.txt.in b/cmake/arrow.txt.in index a8ec274da3..208d4a1eb2 100644 --- a/cmake/arrow.txt.in +++ b/cmake/arrow.txt.in @@ -5,7 +5,7 @@ project(arrow-download NONE) include(ExternalProject) ExternalProject_Add(apachearrow GIT_REPOSITORY https://github.com/apache/arrow.git - GIT_TAG apache-arrow-0.16.0 + GIT_TAG apache-arrow-1.0.1 SOURCE_DIR "${CMAKE_BINARY_DIR}/arrow-src" BINARY_DIR "${CMAKE_BINARY_DIR}/arrow-build" CONFIGURE_COMMAND "" diff --git a/cmake/arrow/CMakeLists.txt b/cmake/arrow/CMakeLists.txt index ce6d131c3a..16fab1a568 100644 --- a/cmake/arrow/CMakeLists.txt +++ b/cmake/arrow/CMakeLists.txt @@ -1,9 +1,14 @@ set(CMAKE_SHARED_LIBRARY_SUFFIX .so) set(ARROW_SRCS - ${CMAKE_BINARY_DIR}/arrow-src/cpp/src/arrow/array.cc ${CMAKE_BINARY_DIR}/arrow-src/cpp/src/arrow/builder.cc ${CMAKE_BINARY_DIR}/arrow-src/cpp/src/arrow/pretty_print.cc + ${CMAKE_BINARY_DIR}/arrow-src/cpp/src/arrow/array/array_base.cc + ${CMAKE_BINARY_DIR}/arrow-src/cpp/src/arrow/array/array_binary.cc + ${CMAKE_BINARY_DIR}/arrow-src/cpp/src/arrow/array/array_decimal.cc + ${CMAKE_BINARY_DIR}/arrow-src/cpp/src/arrow/array/array_dict.cc + ${CMAKE_BINARY_DIR}/arrow-src/cpp/src/arrow/array/array_nested.cc + ${CMAKE_BINARY_DIR}/arrow-src/cpp/src/arrow/array/array_primitive.cc ${CMAKE_BINARY_DIR}/arrow-src/cpp/src/arrow/array/builder_adaptive.cc ${CMAKE_BINARY_DIR}/arrow-src/cpp/src/arrow/array/builder_base.cc ${CMAKE_BINARY_DIR}/arrow-src/cpp/src/arrow/array/builder_binary.cc @@ -13,11 +18,14 @@ set(ARROW_SRCS ${CMAKE_BINARY_DIR}/arrow-src/cpp/src/arrow/array/builder_primitive.cc ${CMAKE_BINARY_DIR}/arrow-src/cpp/src/arrow/array/builder_union.cc ${CMAKE_BINARY_DIR}/arrow-src/cpp/src/arrow/array/concatenate.cc - ${CMAKE_BINARY_DIR}/arrow-src/cpp/src/arrow/array/dict_internal.cc + ${CMAKE_BINARY_DIR}/arrow-src/cpp/src/arrow/array/data.cc ${CMAKE_BINARY_DIR}/arrow-src/cpp/src/arrow/array/diff.cc + ${CMAKE_BINARY_DIR}/arrow-src/cpp/src/arrow/array/util.cc ${CMAKE_BINARY_DIR}/arrow-src/cpp/src/arrow/array/validate.cc ${CMAKE_BINARY_DIR}/arrow-src/cpp/src/arrow/buffer.cc + ${CMAKE_BINARY_DIR}/arrow-src/cpp/src/arrow/chunked_array.cc ${CMAKE_BINARY_DIR}/arrow-src/cpp/src/arrow/compare.cc + ${CMAKE_BINARY_DIR}/arrow-src/cpp/src/arrow/device.cc ${CMAKE_BINARY_DIR}/arrow-src/cpp/src/arrow/extension_type.cc ${CMAKE_BINARY_DIR}/arrow-src/cpp/src/arrow/memory_pool.cc ${CMAKE_BINARY_DIR}/arrow-src/cpp/src/arrow/pretty_print.cc @@ -55,10 +63,13 @@ set(ARROW_SRCS ${CMAKE_BINARY_DIR}/arrow-src/cpp/src/arrow/io/memory.cc ${CMAKE_BINARY_DIR}/arrow-src/cpp/src/arrow/testing/util.cc ${CMAKE_BINARY_DIR}/arrow-src/cpp/src/arrow/util/basic_decimal.cc + ${CMAKE_BINARY_DIR}/arrow-src/cpp/src/arrow/util/bit_block_counter.cc ${CMAKE_BINARY_DIR}/arrow-src/cpp/src/arrow/util/bit_util.cc + ${CMAKE_BINARY_DIR}/arrow-src/cpp/src/arrow/util/bitmap_ops.cc ${CMAKE_BINARY_DIR}/arrow-src/cpp/src/arrow/util/compression.cc ${CMAKE_BINARY_DIR}/arrow-src/cpp/src/arrow/util/cpu_info.cc ${CMAKE_BINARY_DIR}/arrow-src/cpp/src/arrow/util/decimal.cc + ${CMAKE_BINARY_DIR}/arrow-src/cpp/src/arrow/util/future.cc ${CMAKE_BINARY_DIR}/arrow-src/cpp/src/arrow/util/int_util.cc ${CMAKE_BINARY_DIR}/arrow-src/cpp/src/arrow/util/io_util.cc ${CMAKE_BINARY_DIR}/arrow-src/cpp/src/arrow/util/logging.cc @@ -73,8 +84,8 @@ set(ARROW_SRCS ${CMAKE_BINARY_DIR}/arrow-src/cpp/src/arrow/vendored/datetime/tz.cpp ${CMAKE_BINARY_DIR}/arrow-src/cpp/src/arrow/ipc/dictionary.cc ${CMAKE_BINARY_DIR}/arrow-src/cpp/src/arrow/ipc/feather.cc - ${CMAKE_BINARY_DIR}/arrow-src/cpp/src/arrow/ipc/json_integration.cc - ${CMAKE_BINARY_DIR}/arrow-src/cpp/src/arrow/ipc/json_internal.cc + # ${CMAKE_BINARY_DIR}/arrow-src/cpp/src/arrow/ipc/json_integration.cc + # ${CMAKE_BINARY_DIR}/arrow-src/cpp/src/arrow/ipc/json_internal.cc ${CMAKE_BINARY_DIR}/arrow-src/cpp/src/arrow/ipc/json_simple.cc ${CMAKE_BINARY_DIR}/arrow-src/cpp/src/arrow/ipc/message.cc ${CMAKE_BINARY_DIR}/arrow-src/cpp/src/arrow/ipc/metadata_internal.cc diff --git a/cmake/arrow/config.h b/cmake/arrow/config.h index 2d63504e59..33ccf6f1cd 100644 --- a/cmake/arrow/config.h +++ b/cmake/arrow/config.h @@ -15,9 +15,9 @@ // specific language governing permissions and limitations // under the License. -#define ARROW_VERSION_MAJOR 0 -#define ARROW_VERSION_MINOR 16 -#define ARROW_VERSION_PATCH 0 +#define ARROW_VERSION_MAJOR 1 +#define ARROW_VERSION_MINOR 0 +#define ARROW_VERSION_PATCH 1 #define ARROW_VERSION ((ARROW_VERSION_MAJOR * 1000) + ARROW_VERSION_MINOR) * 1000 + ARROW_VERSION_PATCH /* #undef DOUBLE_CONVERSION_HAS_CASE_INSENSIBILITY */ diff --git a/cmake/modules/FindPyArrow.cmake b/cmake/modules/FindPyArrow.cmake index 5b978ea829..83e518ac41 100644 --- a/cmake/modules/FindPyArrow.cmake +++ b/cmake/modules/FindPyArrow.cmake @@ -24,23 +24,11 @@ execute_process( "from __future__ import print_function\ntry: import pyarrow; print(' '.join(pyarrow.get_libraries()), end='')\nexcept:pass" OUTPUT_VARIABLE __pyarrow_libraries) -# And the version -execute_process( - COMMAND "${Python_EXECUTABLE}" -c - "from __future__ import print_function\ntry: import pyarrow; print(pyarrow.__version__, end='')\nexcept:pass" - OUTPUT_VARIABLE __pyarrow_version) - find_path(PYTHON_PYARROW_INCLUDE_DIR arrow/python/api.h HINTS "${__pyarrow_path}" "${PYTHON_INCLUDE_PATH}" NO_DEFAULT_PATH) set(PYTHON_PYARROW_LIBRARY_DIR ${__pyarrow_library_dirs}) -# Figure out the major version for the .so/.dylibs -string(REPLACE "." ";" PYARROW_VERSION_LIST ${__pyarrow_version}) -list(GET PYARROW_VERSION_LIST 0 PYARROW_VERSION_MAJOR) -list(GET PYARROW_VERSION_LIST 1 PYARROW_VERSION_MINOR) -list(GET PYARROW_VERSION_LIST 2 PYARROW_VERSION_PATCH) - if(${CMAKE_SYSTEM_NAME} MATCHES "Windows") # windows its just "arrow.dll" set(PYTHON_PYARROW_PYTHON_SHARED_LIBRARY "arrow_python") @@ -48,13 +36,13 @@ if(${CMAKE_SYSTEM_NAME} MATCHES "Windows") set(PYTHON_PYARROW_LIBRARIES ${PYTHON_PYARROW_PYTHON_SHARED_LIBRARY} ${PYTHON_PYARROW_ARROW_SHARED_LIBRARY}) elseif (CMAKE_SYSTEM_NAME MATCHES "Darwin") # Link against pre-built libarrow on MacOS - set(PYTHON_PYARROW_PYTHON_SHARED_LIBRARY ${PYTHON_PYARROW_LIBRARY_DIR}/${CMAKE_SHARED_LIBRARY_PREFIX}arrow_python.${PYARROW_VERSION_MINOR}.dylib) - set(PYTHON_PYARROW_ARROW_SHARED_LIBRARY ${PYTHON_PYARROW_LIBRARY_DIR}/${CMAKE_SHARED_LIBRARY_PREFIX}arrow.${PYARROW_VERSION_MINOR}.dylib) + set(PYTHON_PYARROW_PYTHON_SHARED_LIBRARY ${PYTHON_PYARROW_LIBRARY_DIR}/${CMAKE_SHARED_LIBRARY_PREFIX}arrow_python.100.dylib) + set(PYTHON_PYARROW_ARROW_SHARED_LIBRARY ${PYTHON_PYARROW_LIBRARY_DIR}/${CMAKE_SHARED_LIBRARY_PREFIX}arrow.100.dylib) set(PYTHON_PYARROW_LIBRARIES ${PYTHON_PYARROW_PYTHON_SHARED_LIBRARY} ${PYTHON_PYARROW_ARROW_SHARED_LIBRARY}) else() # linux - set(PYTHON_PYARROW_PYTHON_SHARED_LIBRARY ${CMAKE_SHARED_LIBRARY_PREFIX}arrow_python${CMAKE_SHARED_LIBRARY_SUFFIX}.${PYARROW_VERSION_MINOR}) - set(PYTHON_PYARROW_ARROW_SHARED_LIBRARY ${CMAKE_SHARED_LIBRARY_PREFIX}arrow${CMAKE_SHARED_LIBRARY_SUFFIX}.${PYARROW_VERSION_MINOR}) + set(PYTHON_PYARROW_PYTHON_SHARED_LIBRARY ${CMAKE_SHARED_LIBRARY_PREFIX}arrow_python${CMAKE_SHARED_LIBRARY_SUFFIX}) + set(PYTHON_PYARROW_ARROW_SHARED_LIBRARY ${CMAKE_SHARED_LIBRARY_PREFIX}arrow${CMAKE_SHARED_LIBRARY_SUFFIX}) set(PYTHON_PYARROW_LIBRARIES ${PYTHON_PYARROW_PYTHON_SHARED_LIBRARY} ${PYTHON_PYARROW_ARROW_SHARED_LIBRARY}) endif() diff --git a/cpp/perspective/CMakeLists.txt b/cpp/perspective/CMakeLists.txt index 19e74d1b47..fbb186001f 100644 --- a/cpp/perspective/CMakeLists.txt +++ b/cpp/perspective/CMakeLists.txt @@ -375,10 +375,6 @@ elseif(PSP_CPP_BUILD OR PSP_PYTHON_BUILD) include_directories( ${Python_INCLUDE_DIRS} ) if(MACOS) - # don't link against build python - # https://blog.tim-smith.us/2015/09/python-extension-modules-os-x/ - set(CMAKE_SHARED_LINKER_FLAGS "${CMAKE_SHARED_LINKER_FLAGS} -undefined dynamic_lookup") - # on mac, use the vanilla pybind11 finder find_package(pybind11) if(pybind11_FOUND) @@ -387,6 +383,11 @@ elseif(PSP_CPP_BUILD OR PSP_PYTHON_BUILD) else() # Check if pip installed PyBind is available find_package(Pybind) + if(PYTHON_PYBIND_FOUND) + # Need to add extra flags due to pybind weirness + # https://github.com/pybind/pybind11/blob/7830e8509f2adc97ce9ee32bf99cd4b82089cc4c/tools/pybind11Tools.cmake#L103 + set(CMAKE_SHARED_LINKER_FLAGS "${CMAKE_SHARED_LINKER_FLAGS} -undefined dynamic_lookup") + endif() endif() else() @@ -631,14 +632,25 @@ elseif(PSP_CPP_BUILD OR PSP_PYTHON_BUILD) # `site-packages`, the relative search path should be able to pick # up pyarrow. This is only enabled for MacOS, as `auditwheel` # will not delocate libarrow properly if it is in the rpath. - set_property(TARGET psp PROPERTY INSTALL_RPATH ${CMAKE_INSTALL_RPATH} ${module_origin_path} ${module_origin_path}../../pyarrow/ ${PYTHON_PYARROW_LIBRARY_DIR} ${PSP_PYTHON_ARROWINSTALLDIR}) - set_property(TARGET binding PROPERTY INSTALL_RPATH ${CMAKE_INSTALL_RPATH} ${module_origin_path} ${module_origin_path}../../pyarrow/ ${PYTHON_PYARROW_LIBRARY_DIR} ${PSP_PYTHON_ARROWINSTALLDIR}) + # + # We also include the install-time path to pyarrow so that pep-518 + # can build properly + set_property(TARGET psp PROPERTY INSTALL_RPATH ${CMAKE_INSTALL_RPATH} ${module_origin_path} ${module_origin_path}../../pyarrow ${PYTHON_PYARROW_LIBRARY_DIR} ${PSP_PYTHON_ARROWINSTALLDIR}) + set_property(TARGET binding PROPERTY INSTALL_RPATH ${CMAKE_INSTALL_RPATH} ${module_origin_path} ${module_origin_path}../../pyarrow ${PYTHON_PYARROW_LIBRARY_DIR} ${PSP_PYTHON_ARROWINSTALLDIR}) else() target_compile_options(binding PRIVATE -Wdeprecated-declarations) endif() target_link_libraries(psp ${PYTHON_PYARROW_LIBRARIES}) - target_link_libraries(binding ${PYTHON_PYARROW_LIBRARIES}) + target_link_libraries(binding ${PYTHON_PYARROW_LIBRARIES}) + + if(WIN32) + # Don't link + + else() + target_link_libraries(psp ${PYTHON_LIBRARIES}) + target_link_libraries(binding ${PYTHON_LIBRARIES}) + endif() target_link_libraries(psp tbb) target_link_libraries(binding tbb) @@ -710,4 +722,4 @@ if(PSP_BUILD_DOCS) # add_dependencies(docs-html doxygen) endif() -########## +########## \ No newline at end of file diff --git a/cpp/perspective/src/cpp/arrow_loader.cpp b/cpp/perspective/src/cpp/arrow_loader.cpp index 51ba586c99..7533150d09 100644 --- a/cpp/perspective/src/cpp/arrow_loader.cpp +++ b/cpp/perspective/src/cpp/arrow_loader.cpp @@ -58,44 +58,46 @@ namespace apachearrow { ArrowLoader::initialize(const uintptr_t ptr, const uint32_t length) { arrow::io::BufferReader buffer_reader(reinterpret_cast(ptr), length); if (std::memcmp("ARROW1", (const void *)ptr, 6) == 0) { - std::shared_ptr batch_reader; - arrow::Status status = arrow::ipc::RecordBatchFileReader::Open(&buffer_reader, &batch_reader); + auto status = arrow::ipc::RecordBatchFileReader::Open(&buffer_reader); if (!status.ok()) { std::stringstream ss; - ss << "Failed to open RecordBatchFileReader: " << status.message() << std::endl; + ss << "Failed to open RecordBatchFileReader: " << status.status().ToString() << std::endl; PSP_COMPLAIN_AND_ABORT(ss.str()); } else { + std::shared_ptr batch_reader = *status; std::vector> batches; auto num_batches = batch_reader->num_record_batches(); for (int i = 0; i < num_batches; ++i) { - std::shared_ptr chunk; - status = batch_reader->ReadRecordBatch(i, &chunk); - if (!status.ok()) { + + auto status2 = batch_reader->ReadRecordBatch(i); + if (!status2.ok()) { PSP_COMPLAIN_AND_ABORT( - "Failed to read file record batch: " + status.message()); + "Failed to read file record batch: " + status2.status().ToString()); } + std::shared_ptr chunk = *status2; batches.push_back(chunk); } - status = arrow::Table::FromRecordBatches(batches, &m_table); - if (!status.ok()) { + auto status3 = arrow::Table::FromRecordBatches(batches); + if (!status3.ok()) { std::stringstream ss; ss << "Failed to create Table from RecordBatches: " - << status.message() << std::endl; + << status3.status().ToString() << std::endl; PSP_COMPLAIN_AND_ABORT(ss.str()); }; + m_table = *status3; }; } else { - std::shared_ptr batch_reader; - arrow::Status status = arrow::ipc::RecordBatchStreamReader::Open(&buffer_reader, &batch_reader); + auto status = arrow::ipc::RecordBatchStreamReader::Open(&buffer_reader); if (!status.ok()) { std::stringstream ss; - ss << "Failed to open RecordBatchStreamReader: " << status.message() << std::endl; + ss << "Failed to open RecordBatchStreamReader: " << status.status().ToString() << std::endl; PSP_COMPLAIN_AND_ABORT(ss.str()); } else { - status = batch_reader->ReadAll(&m_table); - if (!status.ok()) { + auto batch_reader = *status; + auto status5 = batch_reader->ReadAll(&m_table); + if (!status5.ok()) { std::stringstream ss; - ss << "Failed to read stream record batch: " << status.message() << std::endl; + ss << "Failed to read stream record batch: " << status5.ToString() << std::endl; PSP_COMPLAIN_AND_ABORT(ss.str()); }; } diff --git a/cpp/perspective/src/cpp/arrow_writer.cpp b/cpp/perspective/src/cpp/arrow_writer.cpp index 043ae11a94..2f2166505e 100644 --- a/cpp/perspective/src/cpp/arrow_writer.cpp +++ b/cpp/perspective/src/cpp/arrow_writer.cpp @@ -263,11 +263,20 @@ namespace apachearrow { auto dictionary_type = arrow::dictionary(arrow::int32(), arrow::utf8()); - std::shared_ptr dictionary_array; - PSP_CHECK_ARROW_STATUS(arrow::DictionaryArray::FromArrays( - dictionary_type, indices_array, values_array, &dictionary_array)); + arrow::Result> result = arrow::DictionaryArray::FromArrays( + dictionary_type, + indices_array, + values_array + ); - return dictionary_array; + if (!result.ok()) { + std::stringstream ss; + ss << "Could not write values for dictionary array: " + << result.status().message() + << std::endl; + PSP_COMPLAIN_AND_ABORT(ss.str()); + } + return result.ValueOrDie(); } } // namespace arrow diff --git a/cpp/perspective/src/cpp/view.cpp b/cpp/perspective/src/cpp/view.cpp index 449ce8fb9d..4e30c57de3 100644 --- a/cpp/perspective/src/cpp/view.cpp +++ b/cpp/perspective/src/cpp/view.cpp @@ -567,21 +567,21 @@ View::data_slice_to_arrow( } std::shared_ptr buffer; - auto allocated = arrow::AllocateResizableBuffer(0, &buffer); + arrow::Result> allocated = arrow::AllocateResizableBuffer(0); if (!allocated.ok()) { std::stringstream ss; - ss << "Failed to allocate buffer: " << allocated.message() << std::endl; + ss << "Failed to allocate buffer: " << allocated.status().message() << std::endl; PSP_COMPLAIN_AND_ABORT(ss.str()); } - + + buffer = allocated.ValueOrDie(); arrow::io::BufferOutputStream sink(buffer); - auto options = arrow::ipc::IpcOptions::Defaults(); // options.allow_64bit = true; // options.write_legacy_ipc_format = true; // options.alignment = 64; - auto res = arrow::ipc::RecordBatchStreamWriter::Open(&sink, arrow_schema, options); + auto res = arrow::ipc::NewStreamWriter(&sink, arrow_schema); std::shared_ptr writer = *res; PSP_CHECK_ARROW_STATUS(writer->WriteRecordBatch(*batches)); diff --git a/python/perspective/pyproject.toml b/python/perspective/pyproject.toml index c3cb3c9c5a..8ecbaf4a87 100644 --- a/python/perspective/pyproject.toml +++ b/python/perspective/pyproject.toml @@ -1,3 +1,3 @@ [build-system] # Minimum requirements for the build system to execute. -requires = ["setuptools", "wheel", "backports.shutil_which ; python_version <'3'", "numpy>=1.13.1", "pyarrow>=0.16.0,<1"] +requires = ["setuptools", "wheel", "backports.shutil_which ; python_version <'3'", "numpy>=1.13.1", "pyarrow>=1.0.1,<1"] diff --git a/python/perspective/setup.py b/python/perspective/setup.py index 3ae5f1f9d3..5bbe325739 100644 --- a/python/perspective/setup.py +++ b/python/perspective/setup.py @@ -49,7 +49,7 @@ def which(x): "future>=0.16.0", "numpy>=1.13.1", "pandas>=0.22.0", - "pyarrow>=0.16.0,<1", + "pyarrow>=1.0.1,<2", "python-dateutil>=2.8.0", "six>=1.11.0", "traitlets>=4.3.2",