Skip to content

Commit

Permalink
Merge remote-tracking branch 'upstream/main' into recordbatchreader-f…
Browse files Browse the repository at this point in the history
…rom-stream
  • Loading branch information
jorisvandenbossche committed Jan 8, 2024
2 parents 8e71540 + de3130e commit cb7fe78
Show file tree
Hide file tree
Showing 493 changed files with 10,855 additions and 7,630 deletions.
4 changes: 3 additions & 1 deletion .github/workflows/archery.yml
Original file line number Diff line number Diff line change
Expand Up @@ -63,7 +63,9 @@ jobs:
- name: Install pygit2 binary wheel
run: pip install pygit2 --only-binary pygit2
- name: Install Archery, Crossbow- and Test Dependencies
run: pip install pytest responses -e dev/archery[all]
run: |
pip install -e dev/archery[all]
pip install -r dev/archery/requirements-test.txt
- name: Archery Unittests
working-directory: dev/archery
run: pytest -v archery
Expand Down
2 changes: 0 additions & 2 deletions .github/workflows/cpp.yml
Original file line number Diff line number Diff line change
Expand Up @@ -340,8 +340,6 @@ jobs:
fail-fast: false
matrix:
include:
- msystem_lower: mingw32
msystem_upper: MINGW32
- msystem_lower: mingw64
msystem_upper: MINGW64
- msystem_lower: clang64
Expand Down
2 changes: 1 addition & 1 deletion .github/workflows/ruby.yml
Original file line number Diff line number Diff line change
Expand Up @@ -66,7 +66,7 @@ jobs:
fail-fast: false
matrix:
ubuntu:
- 20.04
- 22.04
env:
UBUNTU: ${{ matrix.ubuntu }}
steps:
Expand Down
2 changes: 1 addition & 1 deletion NOTICE.txt
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
Apache Arrow
Copyright 2016-2019 The Apache Software Foundation
Copyright 2016-2024 The Apache Software Foundation

This product includes software developed at
The Apache Software Foundation (http://www.apache.org/).
Expand Down
6 changes: 4 additions & 2 deletions c_glib/arrow-glib/scalar.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,8 @@
#include <arrow-glib/interval.hpp>
#include <arrow-glib/scalar.hpp>

#include <arrow/compute/cast.h>

G_BEGIN_DECLS

/**
Expand Down Expand Up @@ -385,9 +387,9 @@ garrow_scalar_cast(GArrowScalar *scalar,
{
const auto arrow_scalar = garrow_scalar_get_raw(scalar);
const auto arrow_data_type = garrow_data_type_get_raw(data_type);
auto arrow_casted_scalar_result = arrow_scalar->CastTo(arrow_data_type);
auto arrow_casted_scalar_result = arrow::compute::Cast(arrow_scalar, arrow_data_type);
if (garrow::check(error, arrow_casted_scalar_result, "[scalar][cast]")) {
auto arrow_casted_scalar = *arrow_casted_scalar_result;
auto arrow_casted_scalar = (*arrow_casted_scalar_result).scalar();
return garrow_scalar_new_raw(&arrow_casted_scalar,
"scalar", &arrow_casted_scalar,
"data-type", data_type,
Expand Down
2 changes: 1 addition & 1 deletion ci/scripts/PKGBUILD
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@
_realname=arrow
pkgbase=mingw-w64-${_realname}
pkgname="${MINGW_PACKAGE_PREFIX}-${_realname}"
pkgver=14.0.1.9000
pkgver=14.0.2.9000
pkgrel=8000
pkgdesc="Apache Arrow is a cross-language development platform for in-memory data (mingw-w64)"
arch=("any")
Expand Down
2 changes: 1 addition & 1 deletion ci/scripts/integration_substrait.sh
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,7 @@ set -e
echo "Substrait Integration Tests"
echo "Validating imports"
python -c "import pyarrow.substrait"
python -c "from substrait_consumer.consumers import AceroConsumer"
python -c "from substrait_consumer.consumers.acero_consumer import AceroConsumer"

echo "Executing pytest"
cd consumer-testing
Expand Down
2 changes: 2 additions & 0 deletions ci/scripts/python_test.sh
Original file line number Diff line number Diff line change
Expand Up @@ -45,6 +45,7 @@ export ARROW_DEBUG_MEMORY_POOL=trap
: ${PYARROW_TEST_HDFS:=${ARROW_HDFS:-ON}}
: ${PYARROW_TEST_ORC:=${ARROW_ORC:-ON}}
: ${PYARROW_TEST_PARQUET:=${ARROW_PARQUET:-ON}}
: ${PYARROW_TEST_PARQUET_ENCRYPTION:=${PARQUET_REQUIRE_ENCRYPTION:-ON}}
: ${PYARROW_TEST_S3:=${ARROW_S3:-ON}}

export PYARROW_TEST_ACERO
Expand All @@ -56,6 +57,7 @@ export PYARROW_TEST_GCS
export PYARROW_TEST_HDFS
export PYARROW_TEST_ORC
export PYARROW_TEST_PARQUET
export PYARROW_TEST_PARQUET_ENCRYPTION
export PYARROW_TEST_S3

# Testing PyArrow
Expand Down
1 change: 1 addition & 0 deletions ci/scripts/python_wheel_unix_test.sh
Original file line number Diff line number Diff line change
Expand Up @@ -46,6 +46,7 @@ export PYARROW_TEST_HDFS=ON
export PYARROW_TEST_ORC=ON
export PYARROW_TEST_PANDAS=ON
export PYARROW_TEST_PARQUET=ON
export PYARROW_TEST_PARQUET_ENCRYPTION=ON
export PYARROW_TEST_SUBSTRAIT=${ARROW_SUBSTRAIT}
export PYARROW_TEST_S3=${ARROW_S3}
export PYARROW_TEST_TENSORFLOW=ON
Expand Down
1 change: 1 addition & 0 deletions ci/scripts/python_wheel_windows_test.bat
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,7 @@ set PYARROW_TEST_GCS=ON
set PYARROW_TEST_HDFS=ON
set PYARROW_TEST_ORC=OFF
set PYARROW_TEST_PARQUET=ON
set PYARROW_TEST_PARQUET_ENCRYPTION=ON
set PYARROW_TEST_SUBSTRAIT=ON
set PYARROW_TEST_S3=OFF
set PYARROW_TEST_TENSORFLOW=ON
Expand Down
2 changes: 1 addition & 1 deletion cpp/cmake_modules/FindLLVMAlt.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -93,8 +93,8 @@ if(LLVM_FOUND)
debuginfodwarf
ipo
linker
mcjit
native
orcjit
target)
if(LLVM_VERSION_MAJOR GREATER_EQUAL 14)
list(APPEND LLVM_TARGET_COMPONENTS passes)
Expand Down
10 changes: 8 additions & 2 deletions cpp/cmake_modules/ThirdpartyToolchain.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -1015,6 +1015,10 @@ else()
endif()

include(FetchContent)
set(FC_DECLARE_COMMON_OPTIONS)
if(CMAKE_VERSION VERSION_GREATER_EQUAL 3.28)
list(APPEND FC_DECLARE_COMMON_OPTIONS EXCLUDE_FROM_ALL TRUE)
endif()

macro(prepare_fetchcontent)
set(BUILD_SHARED_LIBS OFF)
Expand Down Expand Up @@ -2146,6 +2150,9 @@ function(build_gtest)
message(STATUS "Building gtest from source")
set(GTEST_VENDORED TRUE)
fetchcontent_declare(googletest
# We should not specify "EXCLUDE_FROM_ALL TRUE" here.
# Because we install GTest with custom path.
# ${FC_DECLARE_COMMON_OPTIONS}
URL ${GTEST_SOURCE_URL}
URL_HASH "SHA256=${ARROW_GTEST_BUILD_SHA256_CHECKSUM}")
prepare_fetchcontent()
Expand Down Expand Up @@ -5096,8 +5103,7 @@ function(build_azure_sdk)
endif()
message(STATUS "Building Azure SDK for C++ from source")
fetchcontent_declare(azure_sdk
# EXCLUDE_FROM_ALL is available since CMake 3.28
# EXCLUDE_FROM_ALL TRUE
${FC_DECLARE_COMMON_OPTIONS}
URL ${ARROW_AZURE_SDK_URL}
URL_HASH "SHA256=${ARROW_AZURE_SDK_BUILD_SHA256_CHECKSUM}")
prepare_fetchcontent()
Expand Down
2 changes: 1 addition & 1 deletion cpp/examples/arrow/compute_and_write_csv_example.cc
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@
// under the License.

#include <arrow/api.h>
#include <arrow/compute/api_aggregate.h>
#include <arrow/compute/api.h>
#include <arrow/csv/api.h>
#include <arrow/csv/writer.h>
#include <arrow/io/api.h>
Expand Down
5 changes: 3 additions & 2 deletions cpp/src/arrow/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -192,6 +192,7 @@ set(ARROW_SRCS
type_traits.cc
visitor.cc
c/bridge.cc
c/dlpack.cc
io/buffered.cc
io/caching.cc
io/compressed.cc
Expand Down Expand Up @@ -505,8 +506,8 @@ if(ARROW_FILESYSTEM)
filesystem/util_internal.cc)

if(ARROW_AZURE)
list(APPEND ARROW_SRCS filesystem/azurefs.cc filesystem/azurefs_internal.cc)
set_source_files_properties(filesystem/azurefs.cc filesystem/azurefs_internal.cc
list(APPEND ARROW_SRCS filesystem/azurefs.cc)
set_source_files_properties(filesystem/azurefs.cc
PROPERTIES SKIP_PRECOMPILE_HEADERS ON
SKIP_UNITY_BUILD_INCLUSION ON)
endif()
Expand Down
1 change: 1 addition & 0 deletions cpp/src/arrow/acero/aggregate_internal.cc
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,7 @@
#include "arrow/acero/exec_plan.h"
#include "arrow/acero/options.h"
#include "arrow/compute/exec.h"
#include "arrow/compute/function.h"
#include "arrow/compute/registry.h"
#include "arrow/compute/row/grouper.h"
#include "arrow/datum.h"
Expand Down
1 change: 1 addition & 0 deletions cpp/src/arrow/acero/scalar_aggregate_node.cc
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,7 @@
#include "arrow/acero/options.h"
#include "arrow/acero/util.h"
#include "arrow/compute/exec.h"
#include "arrow/compute/function.h"
#include "arrow/compute/registry.h"
#include "arrow/compute/row/grouper.h"
#include "arrow/datum.h"
Expand Down
22 changes: 10 additions & 12 deletions cpp/src/arrow/array/array_nested.cc
Original file line number Diff line number Diff line change
Expand Up @@ -287,10 +287,8 @@ Result<std::shared_ptr<Array>> FlattenListViewArray(const ListViewArrayT& list_v
const auto* sizes = list_view_array.data()->template GetValues<offset_type>(2);

auto is_null_or_empty = [&](int64_t i) {
if constexpr (HasNulls) {
if (!bit_util::GetBit(validity, list_view_array_offset + i)) {
return true;
}
if (HasNulls && !bit_util::GetBit(validity, list_view_array_offset + i)) {
return true;
}
return sizes[i] == 0;
};
Expand Down Expand Up @@ -896,7 +894,8 @@ const std::shared_ptr<DataType>& FixedSizeListArray::value_type() const {
const std::shared_ptr<Array>& FixedSizeListArray::values() const { return values_; }

Result<std::shared_ptr<Array>> FixedSizeListArray::FromArrays(
const std::shared_ptr<Array>& values, int32_t list_size) {
const std::shared_ptr<Array>& values, int32_t list_size,
std::shared_ptr<Buffer> null_bitmap, int64_t null_count) {
if (list_size <= 0) {
return Status::Invalid("list_size needs to be a strict positive integer");
}
Expand All @@ -907,14 +906,14 @@ Result<std::shared_ptr<Array>> FixedSizeListArray::FromArrays(
}
int64_t length = values->length() / list_size;
auto list_type = std::make_shared<FixedSizeListType>(values->type(), list_size);
std::shared_ptr<Buffer> validity_buf;

return std::make_shared<FixedSizeListArray>(list_type, length, values, validity_buf,
/*null_count=*/0, /*offset=*/0);
return std::make_shared<FixedSizeListArray>(list_type, length, values, null_bitmap,
null_count);
}

Result<std::shared_ptr<Array>> FixedSizeListArray::FromArrays(
const std::shared_ptr<Array>& values, std::shared_ptr<DataType> type) {
const std::shared_ptr<Array>& values, std::shared_ptr<DataType> type,
std::shared_ptr<Buffer> null_bitmap, int64_t null_count) {
if (type->id() != Type::FIXED_SIZE_LIST) {
return Status::TypeError("Expected fixed size list type, got ", type->ToString());
}
Expand All @@ -928,10 +927,9 @@ Result<std::shared_ptr<Array>> FixedSizeListArray::FromArrays(
"The length of the values Array needs to be a multiple of the list size");
}
int64_t length = values->length() / list_type.list_size();
std::shared_ptr<Buffer> validity_buf;

return std::make_shared<FixedSizeListArray>(type, length, values, validity_buf,
/*null_count=*/0, /*offset=*/0);
return std::make_shared<FixedSizeListArray>(type, length, values, null_bitmap,
null_count);
}

Result<std::shared_ptr<Array>> FixedSizeListArray::Flatten(
Expand Down
16 changes: 12 additions & 4 deletions cpp/src/arrow/array/array_nested.h
Original file line number Diff line number Diff line change
Expand Up @@ -599,17 +599,25 @@ class ARROW_EXPORT FixedSizeListArray : public Array {
///
/// \param[in] values Array containing list values
/// \param[in] list_size The fixed length of each list
/// \param[in] null_bitmap Optional validity bitmap
/// \param[in] null_count Optional null count in null_bitmap
/// \return Will have length equal to values.length() / list_size
static Result<std::shared_ptr<Array>> FromArrays(const std::shared_ptr<Array>& values,
int32_t list_size);
static Result<std::shared_ptr<Array>> FromArrays(
const std::shared_ptr<Array>& values, int32_t list_size,
std::shared_ptr<Buffer> null_bitmap = NULLPTR,
int64_t null_count = kUnknownNullCount);

/// \brief Construct FixedSizeListArray from child value array and type
///
/// \param[in] values Array containing list values
/// \param[in] type The fixed sized list type
/// \param[in] null_bitmap Optional validity bitmap
/// \param[in] null_count Optional null count in null_bitmap
/// \return Will have length equal to values.length() / type.list_size()
static Result<std::shared_ptr<Array>> FromArrays(const std::shared_ptr<Array>& values,
std::shared_ptr<DataType> type);
static Result<std::shared_ptr<Array>> FromArrays(
const std::shared_ptr<Array>& values, std::shared_ptr<DataType> type,
std::shared_ptr<Buffer> null_bitmap = NULLPTR,
int64_t null_count = kUnknownNullCount);

protected:
void SetData(const std::shared_ptr<ArrayData>& data);
Expand Down
33 changes: 33 additions & 0 deletions cpp/src/arrow/array/data.h
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@
#pragma once

#include <atomic> // IWYU pragma: export
#include <cassert>
#include <cstdint>
#include <memory>
#include <utility>
Expand Down Expand Up @@ -438,6 +439,38 @@ struct ARROW_EXPORT ArraySpan {
return GetValues<T>(i, this->offset);
}

/// \brief Access a buffer's data as a span
///
/// \param i The buffer index
/// \param length The required length (in number of typed values) of the requested span
/// \pre i > 0
/// \pre length <= the length of the buffer (in number of values) that's expected for
/// this array type
/// \return A span<const T> of the requested length
template <typename T>
util::span<const T> GetSpan(int i, int64_t length) const {
const int64_t buffer_length = buffers[i].size / static_cast<int64_t>(sizeof(T));
assert(i > 0 && length + offset <= buffer_length);
ARROW_UNUSED(buffer_length);
return util::span<const T>(buffers[i].data_as<T>() + this->offset, length);
}

/// \brief Access a buffer's data as a span
///
/// \param i The buffer index
/// \param length The required length (in number of typed values) of the requested span
/// \pre i > 0
/// \pre length <= the length of the buffer (in number of values) that's expected for
/// this array type
/// \return A span<T> of the requested length
template <typename T>
util::span<T> GetSpan(int i, int64_t length) {
const int64_t buffer_length = buffers[i].size / static_cast<int64_t>(sizeof(T));
assert(i > 0 && length + offset <= buffer_length);
ARROW_UNUSED(buffer_length);
return util::span<T>(buffers[i].mutable_data_as<T>() + this->offset, length);
}

inline bool IsNull(int64_t i) const { return !IsValid(i); }

inline bool IsValid(int64_t i) const {
Expand Down
13 changes: 13 additions & 0 deletions cpp/src/arrow/buffer.h
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,7 @@
#include "arrow/status.h"
#include "arrow/type_fwd.h"
#include "arrow/util/macros.h"
#include "arrow/util/span.h"
#include "arrow/util/visibility.h"

namespace arrow {
Expand Down Expand Up @@ -233,6 +234,12 @@ class ARROW_EXPORT Buffer {
return reinterpret_cast<const T*>(data());
}

/// \brief Return the buffer's data as a span
template <typename T>
util::span<const T> span_as() const {
return util::span(data_as<T>(), static_cast<size_t>(size() / sizeof(T)));
}

/// \brief Return a writable pointer to the buffer's data
///
/// The buffer has to be a mutable CPU buffer (`is_cpu()` and `is_mutable()`
Expand Down Expand Up @@ -260,6 +267,12 @@ class ARROW_EXPORT Buffer {
return reinterpret_cast<T*>(mutable_data());
}

/// \brief Return the buffer's mutable data as a span
template <typename T>
util::span<T> mutable_span_as() const {
return util::span(mutable_data_as<T>(), static_cast<size_t>(size() / sizeof(T)));
}

/// \brief Return the device address of the buffer's data
uintptr_t address() const { return reinterpret_cast<uintptr_t>(data_); }

Expand Down
1 change: 1 addition & 0 deletions cpp/src/arrow/c/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@
# under the License.

add_arrow_test(bridge_test PREFIX "arrow-c")
add_arrow_test(dlpack_test)

add_arrow_benchmark(bridge_benchmark)

Expand Down
Loading

0 comments on commit cb7fe78

Please sign in to comment.