From 976e94eda343182ef0063f9a43e9df3f67d687fb Mon Sep 17 00:00:00 2001 From: Yaron Gvili Date: Fri, 7 Oct 2022 15:23:29 -0400 Subject: [PATCH 1/3] ARROW-17965: [C++] ExecBatch support for ChunkedArray values --- cpp/src/arrow/compute/exec.cc | 40 +++++++++++++++++++++++------- cpp/src/arrow/compute/exec_test.cc | 26 +++++++++++++++++++ 2 files changed, 57 insertions(+), 9 deletions(-) diff --git a/cpp/src/arrow/compute/exec.cc b/cpp/src/arrow/compute/exec.cc index 466b3d5dd4a7b..20a3199577874 100644 --- a/cpp/src/arrow/compute/exec.cc +++ b/cpp/src/arrow/compute/exec.cc @@ -90,14 +90,29 @@ void PrintTo(const ExecBatch& batch, std::ostream* os) { if (value.is_scalar()) { *os << "Scalar[" << value.scalar()->ToString() << "]\n"; continue; + } else if (value.is_array() || value.is_chunked_array()) { + PrettyPrintOptions options; + options.skip_new_lines = true; + if (value.is_array()) { + auto array = value.make_array(); + *os << "Array"; + ARROW_CHECK_OK(PrettyPrint(*array, options, os)); + } else { + auto array = value.chunked_array(); + *os << "Chunked Array"; + ARROW_CHECK_OK(PrettyPrint(*array, options, os)); + } + *os << "\n"; + } else if (value.is_chunked_array()) { + auto array = value.chunked_array(); + PrettyPrintOptions options; + options.skip_new_lines = true; + *os << "Chunked Array"; + ARROW_CHECK_OK(PrettyPrint(*array, options, os)); + *os << "\n"; + } else { + ARROW_DCHECK(false); } - - auto array = value.make_array(); - PrettyPrintOptions options; - options.skip_new_lines = true; - *os << "Array"; - ARROW_CHECK_OK(PrettyPrint(*array, options, os)); - *os << "\n"; } } @@ -118,8 +133,15 @@ std::string ExecBatch::ToString() const { ExecBatch ExecBatch::Slice(int64_t offset, int64_t length) const { ExecBatch out = *this; for (auto& value : out.values) { - if (value.is_scalar()) continue; - value = value.array()->Slice(offset, length); + if (value.is_scalar()) { + continue; + } else if (value.is_array()) { + value = value.array()->Slice(offset, length); + } else if (value.is_chunked_array()) { + value = value.chunked_array()->Slice(offset, length); + } else { + ARROW_DCHECK(false); + } } out.length = std::min(length, this->length - offset); return out; diff --git a/cpp/src/arrow/compute/exec_test.cc b/cpp/src/arrow/compute/exec_test.cc index eac18f194d259..7e23f12eff60b 100644 --- a/cpp/src/arrow/compute/exec_test.cc +++ b/cpp/src/arrow/compute/exec_test.cc @@ -55,6 +55,32 @@ using ::arrow::internal::BitmapEquals; using ::arrow::internal::CopyBitmap; using ::arrow::internal::CountSetBits; +TEST(ExecBatch, SliceBasics) { + int64_t length = 4, cut_length = 2, left_length = length - cut_length; + ExecBatch batch{{Int32Scalar(0), + ArrayFromJSON(utf8(), R"(["a", "b", "c", "d"])"), + ChunkedArrayFromJSON(float64(), {"[1.1]", "[2.2]", "[3.3]", "[4.4]"})}, + length}; + std::vector expected_sliced{ + {{Int32Scalar(0), ArrayFromJSON(utf8(), R"(["a", "b"])"), + ChunkedArrayFromJSON(float64(), {"[1.1]", "[2.2]"})}, + cut_length}, + {{Int32Scalar(0), ArrayFromJSON(utf8(), R"(["c", "d"])"), + ChunkedArrayFromJSON(float64(), {"[3.3]", "[4.4]"})}, + left_length} + }; + std::vector actual_sliced = {batch.Slice(0, cut_length), + batch.Slice(cut_length, left_length)}; + for (size_t i = 0; i < expected_sliced.size(); i++) { + ASSERT_EQ(expected_sliced[i].length, actual_sliced[i].length); + ASSERT_EQ(expected_sliced[i].values.size(), actual_sliced[i].values.size()); + for (size_t j = 0; j < expected_sliced[i].values.size(); j++) { + AssertDatumsEqual(expected_sliced[i].values[j], actual_sliced[i].values[j]); + } + ASSERT_EQ(expected_sliced[i].ToString(), actual_sliced[i].ToString()); + } +} + TEST(ExecContext, BasicWorkings) { { ExecContext ctx; From 863d87584eceb5d81aad703f894d060cae90875e Mon Sep 17 00:00:00 2001 From: Yaron Gvili Date: Sat, 8 Oct 2022 02:17:55 -0400 Subject: [PATCH 2/3] requested fixes --- cpp/src/arrow/compute/exec.cc | 10 +--------- 1 file changed, 1 insertion(+), 9 deletions(-) diff --git a/cpp/src/arrow/compute/exec.cc b/cpp/src/arrow/compute/exec.cc index 20a3199577874..30bf7d9ff122a 100644 --- a/cpp/src/arrow/compute/exec.cc +++ b/cpp/src/arrow/compute/exec.cc @@ -89,7 +89,6 @@ void PrintTo(const ExecBatch& batch, std::ostream* os) { if (value.is_scalar()) { *os << "Scalar[" << value.scalar()->ToString() << "]\n"; - continue; } else if (value.is_array() || value.is_chunked_array()) { PrettyPrintOptions options; options.skip_new_lines = true; @@ -103,13 +102,6 @@ void PrintTo(const ExecBatch& batch, std::ostream* os) { ARROW_CHECK_OK(PrettyPrint(*array, options, os)); } *os << "\n"; - } else if (value.is_chunked_array()) { - auto array = value.chunked_array(); - PrettyPrintOptions options; - options.skip_new_lines = true; - *os << "Chunked Array"; - ARROW_CHECK_OK(PrettyPrint(*array, options, os)); - *os << "\n"; } else { ARROW_DCHECK(false); } @@ -134,7 +126,7 @@ ExecBatch ExecBatch::Slice(int64_t offset, int64_t length) const { ExecBatch out = *this; for (auto& value : out.values) { if (value.is_scalar()) { - continue; + // keep value as is } else if (value.is_array()) { value = value.array()->Slice(offset, length); } else if (value.is_chunked_array()) { From 03771eb43aedfecd16105d28f1dffb0eda96e791 Mon Sep 17 00:00:00 2001 From: Yaron Gvili Date: Sat, 15 Oct 2022 04:11:25 -0400 Subject: [PATCH 3/3] lint --- cpp/src/arrow/compute/exec_test.cc | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/cpp/src/arrow/compute/exec_test.cc b/cpp/src/arrow/compute/exec_test.cc index 7e23f12eff60b..813cf8961e4fe 100644 --- a/cpp/src/arrow/compute/exec_test.cc +++ b/cpp/src/arrow/compute/exec_test.cc @@ -57,8 +57,7 @@ using ::arrow::internal::CountSetBits; TEST(ExecBatch, SliceBasics) { int64_t length = 4, cut_length = 2, left_length = length - cut_length; - ExecBatch batch{{Int32Scalar(0), - ArrayFromJSON(utf8(), R"(["a", "b", "c", "d"])"), + ExecBatch batch{{Int32Scalar(0), ArrayFromJSON(utf8(), R"(["a", "b", "c", "d"])"), ChunkedArrayFromJSON(float64(), {"[1.1]", "[2.2]", "[3.3]", "[4.4]"})}, length}; std::vector expected_sliced{ @@ -67,8 +66,7 @@ TEST(ExecBatch, SliceBasics) { cut_length}, {{Int32Scalar(0), ArrayFromJSON(utf8(), R"(["c", "d"])"), ChunkedArrayFromJSON(float64(), {"[3.3]", "[4.4]"})}, - left_length} - }; + left_length}}; std::vector actual_sliced = {batch.Slice(0, cut_length), batch.Slice(cut_length, left_length)}; for (size_t i = 0; i < expected_sliced.size(); i++) {