Skip to content

Commit

Permalink
Upgrade Arrow to 1.0.1
Browse files Browse the repository at this point in the history
  • Loading branch information
texodus committed Sep 25, 2020
1 parent 5757ad1 commit 459c78b
Show file tree
Hide file tree
Showing 10 changed files with 81 additions and 59 deletions.
2 changes: 1 addition & 1 deletion cmake/arrow.txt.in
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@ project(arrow-download NONE)
include(ExternalProject)
ExternalProject_Add(apachearrow
GIT_REPOSITORY https://github.com/apache/arrow.git
GIT_TAG apache-arrow-0.16.0
GIT_TAG apache-arrow-1.0.1
SOURCE_DIR "${CMAKE_BINARY_DIR}/arrow-src"
BINARY_DIR "${CMAKE_BINARY_DIR}/arrow-build"
CONFIGURE_COMMAND ""
Expand Down
19 changes: 15 additions & 4 deletions cmake/arrow/CMakeLists.txt
Original file line number Diff line number Diff line change
@@ -1,9 +1,14 @@
set(CMAKE_SHARED_LIBRARY_SUFFIX .so)

set(ARROW_SRCS
${CMAKE_BINARY_DIR}/arrow-src/cpp/src/arrow/array.cc
${CMAKE_BINARY_DIR}/arrow-src/cpp/src/arrow/builder.cc
${CMAKE_BINARY_DIR}/arrow-src/cpp/src/arrow/pretty_print.cc
${CMAKE_BINARY_DIR}/arrow-src/cpp/src/arrow/array/array_base.cc
${CMAKE_BINARY_DIR}/arrow-src/cpp/src/arrow/array/array_binary.cc
${CMAKE_BINARY_DIR}/arrow-src/cpp/src/arrow/array/array_decimal.cc
${CMAKE_BINARY_DIR}/arrow-src/cpp/src/arrow/array/array_dict.cc
${CMAKE_BINARY_DIR}/arrow-src/cpp/src/arrow/array/array_nested.cc
${CMAKE_BINARY_DIR}/arrow-src/cpp/src/arrow/array/array_primitive.cc
${CMAKE_BINARY_DIR}/arrow-src/cpp/src/arrow/array/builder_adaptive.cc
${CMAKE_BINARY_DIR}/arrow-src/cpp/src/arrow/array/builder_base.cc
${CMAKE_BINARY_DIR}/arrow-src/cpp/src/arrow/array/builder_binary.cc
Expand All @@ -13,11 +18,14 @@ set(ARROW_SRCS
${CMAKE_BINARY_DIR}/arrow-src/cpp/src/arrow/array/builder_primitive.cc
${CMAKE_BINARY_DIR}/arrow-src/cpp/src/arrow/array/builder_union.cc
${CMAKE_BINARY_DIR}/arrow-src/cpp/src/arrow/array/concatenate.cc
${CMAKE_BINARY_DIR}/arrow-src/cpp/src/arrow/array/dict_internal.cc
${CMAKE_BINARY_DIR}/arrow-src/cpp/src/arrow/array/data.cc
${CMAKE_BINARY_DIR}/arrow-src/cpp/src/arrow/array/diff.cc
${CMAKE_BINARY_DIR}/arrow-src/cpp/src/arrow/array/util.cc
${CMAKE_BINARY_DIR}/arrow-src/cpp/src/arrow/array/validate.cc
${CMAKE_BINARY_DIR}/arrow-src/cpp/src/arrow/buffer.cc
${CMAKE_BINARY_DIR}/arrow-src/cpp/src/arrow/chunked_array.cc
${CMAKE_BINARY_DIR}/arrow-src/cpp/src/arrow/compare.cc
${CMAKE_BINARY_DIR}/arrow-src/cpp/src/arrow/device.cc
${CMAKE_BINARY_DIR}/arrow-src/cpp/src/arrow/extension_type.cc
${CMAKE_BINARY_DIR}/arrow-src/cpp/src/arrow/memory_pool.cc
${CMAKE_BINARY_DIR}/arrow-src/cpp/src/arrow/pretty_print.cc
Expand Down Expand Up @@ -55,10 +63,13 @@ set(ARROW_SRCS
${CMAKE_BINARY_DIR}/arrow-src/cpp/src/arrow/io/memory.cc
${CMAKE_BINARY_DIR}/arrow-src/cpp/src/arrow/testing/util.cc
${CMAKE_BINARY_DIR}/arrow-src/cpp/src/arrow/util/basic_decimal.cc
${CMAKE_BINARY_DIR}/arrow-src/cpp/src/arrow/util/bit_block_counter.cc
${CMAKE_BINARY_DIR}/arrow-src/cpp/src/arrow/util/bit_util.cc
${CMAKE_BINARY_DIR}/arrow-src/cpp/src/arrow/util/bitmap_ops.cc
${CMAKE_BINARY_DIR}/arrow-src/cpp/src/arrow/util/compression.cc
${CMAKE_BINARY_DIR}/arrow-src/cpp/src/arrow/util/cpu_info.cc
${CMAKE_BINARY_DIR}/arrow-src/cpp/src/arrow/util/decimal.cc
${CMAKE_BINARY_DIR}/arrow-src/cpp/src/arrow/util/future.cc
${CMAKE_BINARY_DIR}/arrow-src/cpp/src/arrow/util/int_util.cc
${CMAKE_BINARY_DIR}/arrow-src/cpp/src/arrow/util/io_util.cc
${CMAKE_BINARY_DIR}/arrow-src/cpp/src/arrow/util/logging.cc
Expand All @@ -73,8 +84,8 @@ set(ARROW_SRCS
${CMAKE_BINARY_DIR}/arrow-src/cpp/src/arrow/vendored/datetime/tz.cpp
${CMAKE_BINARY_DIR}/arrow-src/cpp/src/arrow/ipc/dictionary.cc
${CMAKE_BINARY_DIR}/arrow-src/cpp/src/arrow/ipc/feather.cc
${CMAKE_BINARY_DIR}/arrow-src/cpp/src/arrow/ipc/json_integration.cc
${CMAKE_BINARY_DIR}/arrow-src/cpp/src/arrow/ipc/json_internal.cc
# ${CMAKE_BINARY_DIR}/arrow-src/cpp/src/arrow/ipc/json_integration.cc
# ${CMAKE_BINARY_DIR}/arrow-src/cpp/src/arrow/ipc/json_internal.cc
${CMAKE_BINARY_DIR}/arrow-src/cpp/src/arrow/ipc/json_simple.cc
${CMAKE_BINARY_DIR}/arrow-src/cpp/src/arrow/ipc/message.cc
${CMAKE_BINARY_DIR}/arrow-src/cpp/src/arrow/ipc/metadata_internal.cc
Expand Down
6 changes: 3 additions & 3 deletions cmake/arrow/config.h
Original file line number Diff line number Diff line change
Expand Up @@ -15,9 +15,9 @@
// specific language governing permissions and limitations
// under the License.

#define ARROW_VERSION_MAJOR 0
#define ARROW_VERSION_MINOR 16
#define ARROW_VERSION_PATCH 0
#define ARROW_VERSION_MAJOR 1
#define ARROW_VERSION_MINOR 0
#define ARROW_VERSION_PATCH 1
#define ARROW_VERSION ((ARROW_VERSION_MAJOR * 1000) + ARROW_VERSION_MINOR) * 1000 + ARROW_VERSION_PATCH

/* #undef DOUBLE_CONVERSION_HAS_CASE_INSENSIBILITY */
Expand Down
20 changes: 4 additions & 16 deletions cmake/modules/FindPyArrow.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -24,37 +24,25 @@ execute_process(
"from __future__ import print_function\ntry: import pyarrow; print(' '.join(pyarrow.get_libraries()), end='')\nexcept:pass"
OUTPUT_VARIABLE __pyarrow_libraries)

# And the version
execute_process(
COMMAND "${Python_EXECUTABLE}" -c
"from __future__ import print_function\ntry: import pyarrow; print(pyarrow.__version__, end='')\nexcept:pass"
OUTPUT_VARIABLE __pyarrow_version)

find_path(PYTHON_PYARROW_INCLUDE_DIR arrow/python/api.h
HINTS "${__pyarrow_path}" "${PYTHON_INCLUDE_PATH}" NO_DEFAULT_PATH)

set(PYTHON_PYARROW_LIBRARY_DIR ${__pyarrow_library_dirs})

# Figure out the major version for the .so/.dylibs
string(REPLACE "." ";" PYARROW_VERSION_LIST ${__pyarrow_version})
list(GET PYARROW_VERSION_LIST 0 PYARROW_VERSION_MAJOR)
list(GET PYARROW_VERSION_LIST 1 PYARROW_VERSION_MINOR)
list(GET PYARROW_VERSION_LIST 2 PYARROW_VERSION_PATCH)

if(${CMAKE_SYSTEM_NAME} MATCHES "Windows")
# windows its just "arrow.dll"
set(PYTHON_PYARROW_PYTHON_SHARED_LIBRARY "arrow_python")
set(PYTHON_PYARROW_ARROW_SHARED_LIBRARY "arrow")
set(PYTHON_PYARROW_LIBRARIES ${PYTHON_PYARROW_PYTHON_SHARED_LIBRARY} ${PYTHON_PYARROW_ARROW_SHARED_LIBRARY})
elseif (CMAKE_SYSTEM_NAME MATCHES "Darwin")
# Link against pre-built libarrow on MacOS
set(PYTHON_PYARROW_PYTHON_SHARED_LIBRARY ${PYTHON_PYARROW_LIBRARY_DIR}/${CMAKE_SHARED_LIBRARY_PREFIX}arrow_python.${PYARROW_VERSION_MINOR}.dylib)
set(PYTHON_PYARROW_ARROW_SHARED_LIBRARY ${PYTHON_PYARROW_LIBRARY_DIR}/${CMAKE_SHARED_LIBRARY_PREFIX}arrow.${PYARROW_VERSION_MINOR}.dylib)
set(PYTHON_PYARROW_PYTHON_SHARED_LIBRARY ${PYTHON_PYARROW_LIBRARY_DIR}/${CMAKE_SHARED_LIBRARY_PREFIX}arrow_python.100.dylib)
set(PYTHON_PYARROW_ARROW_SHARED_LIBRARY ${PYTHON_PYARROW_LIBRARY_DIR}/${CMAKE_SHARED_LIBRARY_PREFIX}arrow.100.dylib)
set(PYTHON_PYARROW_LIBRARIES ${PYTHON_PYARROW_PYTHON_SHARED_LIBRARY} ${PYTHON_PYARROW_ARROW_SHARED_LIBRARY})
else()
# linux
set(PYTHON_PYARROW_PYTHON_SHARED_LIBRARY ${CMAKE_SHARED_LIBRARY_PREFIX}arrow_python${CMAKE_SHARED_LIBRARY_SUFFIX}.${PYARROW_VERSION_MINOR})
set(PYTHON_PYARROW_ARROW_SHARED_LIBRARY ${CMAKE_SHARED_LIBRARY_PREFIX}arrow${CMAKE_SHARED_LIBRARY_SUFFIX}.${PYARROW_VERSION_MINOR})
set(PYTHON_PYARROW_PYTHON_SHARED_LIBRARY ${CMAKE_SHARED_LIBRARY_PREFIX}arrow_python${CMAKE_SHARED_LIBRARY_SUFFIX})
set(PYTHON_PYARROW_ARROW_SHARED_LIBRARY ${CMAKE_SHARED_LIBRARY_PREFIX}arrow${CMAKE_SHARED_LIBRARY_SUFFIX})
set(PYTHON_PYARROW_LIBRARIES ${PYTHON_PYARROW_PYTHON_SHARED_LIBRARY} ${PYTHON_PYARROW_ARROW_SHARED_LIBRARY})
endif()

Expand Down
28 changes: 20 additions & 8 deletions cpp/perspective/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -375,10 +375,6 @@ elseif(PSP_CPP_BUILD OR PSP_PYTHON_BUILD)
include_directories( ${Python_INCLUDE_DIRS} )

if(MACOS)
# don't link against build python
# https://blog.tim-smith.us/2015/09/python-extension-modules-os-x/
set(CMAKE_SHARED_LINKER_FLAGS "${CMAKE_SHARED_LINKER_FLAGS} -undefined dynamic_lookup")

# on mac, use the vanilla pybind11 finder
find_package(pybind11)
if(pybind11_FOUND)
Expand All @@ -387,6 +383,11 @@ elseif(PSP_CPP_BUILD OR PSP_PYTHON_BUILD)
else()
# Check if pip installed PyBind is available
find_package(Pybind)
if(PYTHON_PYBIND_FOUND)
# Need to add extra flags due to pybind weirness
# https://github.com/pybind/pybind11/blob/7830e8509f2adc97ce9ee32bf99cd4b82089cc4c/tools/pybind11Tools.cmake#L103
set(CMAKE_SHARED_LINKER_FLAGS "${CMAKE_SHARED_LINKER_FLAGS} -undefined dynamic_lookup")
endif()
endif()

else()
Expand Down Expand Up @@ -631,14 +632,25 @@ elseif(PSP_CPP_BUILD OR PSP_PYTHON_BUILD)
# `site-packages`, the relative search path should be able to pick
# up pyarrow. This is only enabled for MacOS, as `auditwheel`
# will not delocate libarrow properly if it is in the rpath.
set_property(TARGET psp PROPERTY INSTALL_RPATH ${CMAKE_INSTALL_RPATH} ${module_origin_path} ${module_origin_path}../../pyarrow/ ${PYTHON_PYARROW_LIBRARY_DIR} ${PSP_PYTHON_ARROWINSTALLDIR})
set_property(TARGET binding PROPERTY INSTALL_RPATH ${CMAKE_INSTALL_RPATH} ${module_origin_path} ${module_origin_path}../../pyarrow/ ${PYTHON_PYARROW_LIBRARY_DIR} ${PSP_PYTHON_ARROWINSTALLDIR})
#
# We also include the install-time path to pyarrow so that pep-518
# can build properly
set_property(TARGET psp PROPERTY INSTALL_RPATH ${CMAKE_INSTALL_RPATH} ${module_origin_path} ${module_origin_path}../../pyarrow ${PYTHON_PYARROW_LIBRARY_DIR} ${PSP_PYTHON_ARROWINSTALLDIR})
set_property(TARGET binding PROPERTY INSTALL_RPATH ${CMAKE_INSTALL_RPATH} ${module_origin_path} ${module_origin_path}../../pyarrow ${PYTHON_PYARROW_LIBRARY_DIR} ${PSP_PYTHON_ARROWINSTALLDIR})
else()
target_compile_options(binding PRIVATE -Wdeprecated-declarations)
endif()

target_link_libraries(psp ${PYTHON_PYARROW_LIBRARIES})
target_link_libraries(binding ${PYTHON_PYARROW_LIBRARIES})
target_link_libraries(binding ${PYTHON_PYARROW_LIBRARIES})

if(WIN32)
# Don't link

else()
target_link_libraries(psp ${PYTHON_LIBRARIES})
target_link_libraries(binding ${PYTHON_LIBRARIES})
endif()

target_link_libraries(psp tbb)
target_link_libraries(binding tbb)
Expand Down Expand Up @@ -710,4 +722,4 @@ if(PSP_BUILD_DOCS)
# add_dependencies(docs-html doxygen)

endif()
##########
##########
34 changes: 18 additions & 16 deletions cpp/perspective/src/cpp/arrow_loader.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -58,44 +58,46 @@ namespace apachearrow {
ArrowLoader::initialize(const uintptr_t ptr, const uint32_t length) {
arrow::io::BufferReader buffer_reader(reinterpret_cast<const std::uint8_t*>(ptr), length);
if (std::memcmp("ARROW1", (const void *)ptr, 6) == 0) {
std::shared_ptr<arrow::ipc::RecordBatchFileReader> batch_reader;
arrow::Status status = arrow::ipc::RecordBatchFileReader::Open(&buffer_reader, &batch_reader);
auto status = arrow::ipc::RecordBatchFileReader::Open(&buffer_reader);
if (!status.ok()) {
std::stringstream ss;
ss << "Failed to open RecordBatchFileReader: " << status.message() << std::endl;
ss << "Failed to open RecordBatchFileReader: " << status.status().ToString() << std::endl;
PSP_COMPLAIN_AND_ABORT(ss.str());
} else {
std::shared_ptr<arrow::ipc::RecordBatchFileReader> batch_reader = *status;
std::vector<std::shared_ptr<arrow::RecordBatch>> batches;
auto num_batches = batch_reader->num_record_batches();
for (int i = 0; i < num_batches; ++i) {
std::shared_ptr<arrow::RecordBatch> chunk;
status = batch_reader->ReadRecordBatch(i, &chunk);
if (!status.ok()) {

auto status2 = batch_reader->ReadRecordBatch(i);
if (!status2.ok()) {
PSP_COMPLAIN_AND_ABORT(
"Failed to read file record batch: " + status.message());
"Failed to read file record batch: " + status2.status().ToString());
}
std::shared_ptr<arrow::RecordBatch> chunk = *status2;
batches.push_back(chunk);
}
status = arrow::Table::FromRecordBatches(batches, &m_table);
if (!status.ok()) {
auto status3 = arrow::Table::FromRecordBatches(batches);
if (!status3.ok()) {
std::stringstream ss;
ss << "Failed to create Table from RecordBatches: "
<< status.message() << std::endl;
<< status3.status().ToString() << std::endl;
PSP_COMPLAIN_AND_ABORT(ss.str());
};
m_table = *status3;
};
} else {
std::shared_ptr<arrow::ipc::RecordBatchReader> batch_reader;
arrow::Status status = arrow::ipc::RecordBatchStreamReader::Open(&buffer_reader, &batch_reader);
auto status = arrow::ipc::RecordBatchStreamReader::Open(&buffer_reader);
if (!status.ok()) {
std::stringstream ss;
ss << "Failed to open RecordBatchStreamReader: " << status.message() << std::endl;
ss << "Failed to open RecordBatchStreamReader: " << status.status().ToString() << std::endl;
PSP_COMPLAIN_AND_ABORT(ss.str());
} else {
status = batch_reader->ReadAll(&m_table);
if (!status.ok()) {
auto batch_reader = *status;
auto status5 = batch_reader->ReadAll(&m_table);
if (!status5.ok()) {
std::stringstream ss;
ss << "Failed to read stream record batch: " << status.message() << std::endl;
ss << "Failed to read stream record batch: " << status5.ToString() << std::endl;
PSP_COMPLAIN_AND_ABORT(ss.str());
};
}
Expand Down
17 changes: 13 additions & 4 deletions cpp/perspective/src/cpp/arrow_writer.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -263,11 +263,20 @@ namespace apachearrow {
auto dictionary_type =
arrow::dictionary(arrow::int32(), arrow::utf8());

std::shared_ptr<arrow::Array> dictionary_array;
PSP_CHECK_ARROW_STATUS(arrow::DictionaryArray::FromArrays(
dictionary_type, indices_array, values_array, &dictionary_array));
arrow::Result<std::shared_ptr<arrow::Array>> result = arrow::DictionaryArray::FromArrays(
dictionary_type,
indices_array,
values_array
);

return dictionary_array;
if (!result.ok()) {
std::stringstream ss;
ss << "Could not write values for dictionary array: "
<< result.status().message()
<< std::endl;
PSP_COMPLAIN_AND_ABORT(ss.str());
}
return result.ValueOrDie();
}

} // namespace arrow
Expand Down
10 changes: 5 additions & 5 deletions cpp/perspective/src/cpp/view.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -567,21 +567,21 @@ View<CTX_T>::data_slice_to_arrow(
}

std::shared_ptr<arrow::ResizableBuffer> buffer;
auto allocated = arrow::AllocateResizableBuffer(0, &buffer);
arrow::Result<std::shared_ptr<arrow::ResizableBuffer>> allocated = arrow::AllocateResizableBuffer(0);
if (!allocated.ok()) {
std::stringstream ss;
ss << "Failed to allocate buffer: " << allocated.message() << std::endl;
ss << "Failed to allocate buffer: " << allocated.status().message() << std::endl;
PSP_COMPLAIN_AND_ABORT(ss.str());
}


buffer = allocated.ValueOrDie();
arrow::io::BufferOutputStream sink(buffer);

auto options = arrow::ipc::IpcOptions::Defaults();
// options.allow_64bit = true;
// options.write_legacy_ipc_format = true;
// options.alignment = 64;

auto res = arrow::ipc::RecordBatchStreamWriter::Open(&sink, arrow_schema, options);
auto res = arrow::ipc::NewStreamWriter(&sink, arrow_schema);
std::shared_ptr<arrow::ipc::RecordBatchWriter> writer = *res;

PSP_CHECK_ARROW_STATUS(writer->WriteRecordBatch(*batches));
Expand Down
2 changes: 1 addition & 1 deletion python/perspective/pyproject.toml
Original file line number Diff line number Diff line change
@@ -1,3 +1,3 @@
[build-system]
# Minimum requirements for the build system to execute.
requires = ["setuptools", "wheel", "backports.shutil_which ; python_version <'3'", "numpy>=1.13.1", "pyarrow>=0.16.0,<1"]
requires = ["setuptools", "wheel", "backports.shutil_which ; python_version <'3'", "numpy>=1.13.1", "pyarrow>=1.0.1,<1"]
2 changes: 1 addition & 1 deletion python/perspective/setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -49,7 +49,7 @@ def which(x):
"future>=0.16.0",
"numpy>=1.13.1",
"pandas>=0.22.0",
"pyarrow>=0.16.0,<1",
"pyarrow>=1.0.1,<2",
"python-dateutil>=2.8.0",
"six>=1.11.0",
"traitlets>=4.3.2",
Expand Down

0 comments on commit 459c78b

Please sign in to comment.