diff --git a/cpp/src/arrow/record_batch_test.cc b/cpp/src/arrow/record_batch_test.cc index 05a20aa487abc..db68a9a93790d 100644 --- a/cpp/src/arrow/record_batch_test.cc +++ b/cpp/src/arrow/record_batch_test.cc @@ -705,17 +705,12 @@ TEST_F(TestRecordBatch, ToTensorSupportedNaN) { std::vector shape = {9, 2}; const int64_t f32_size = sizeof(float); std::vector f_strides = {f32_size, f32_size * shape[0]}; - std::vector f_values = { - static_cast(NAN), 2, 3, 4, 5, 6, 7, 8, 9, 10, 20, 30, 40, - static_cast(NAN), 60, 70, 80, 90}; - auto data = Buffer::Wrap(f_values); - - std::shared_ptr tensor_expected; - ASSERT_OK_AND_ASSIGN(tensor_expected, Tensor::Make(float32(), data, shape, f_strides)); + std::shared_ptr tensor_expected = TensorFromJSON( + float32(), "[NaN, 2, 3, 4, 5, 6, 7, 8, 9, 10, 20, 30, 40, NaN, 60, 70, 80, 90]", + shape, f_strides); EXPECT_FALSE(tensor_expected->Equals(*tensor)); EXPECT_TRUE(tensor_expected->Equals(*tensor, EqualOptions().nans_equal(true))); - CheckTensor(tensor, 18, shape, f_strides); } @@ -752,15 +747,11 @@ TYPED_TEST_P(TestBatchToTensor, SupportedTypes) { std::vector shape = {9, 3}; std::vector f_strides = {unit_size, unit_size * shape[0]}; - std::vector f_values = {1, 2, 3, 4, 5, 6, 7, 8, 9, - 10, 20, 30, 40, 50, 60, 70, 80, 90, - 100, 100, 100, 100, 100, 100, 100, 100, 100}; - auto data = Buffer::Wrap(f_values); - - std::shared_ptr tensor_expected; - ASSERT_OK_AND_ASSIGN( - tensor_expected, - Tensor::Make(TypeTraits::type_singleton(), data, shape, f_strides)); + std::shared_ptr tensor_expected = TensorFromJSON( + TypeTraits::type_singleton(), + "[1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 20, 30, 40, 50, 60, 70, " + "80, 90, 100, 100, 100, 100, 100, 100, 100, 100, 100]", + shape, f_strides); EXPECT_TRUE(tensor_expected->Equals(*tensor)); CheckTensor(tensor, 27, shape, f_strides); @@ -773,15 +764,11 @@ TYPED_TEST_P(TestBatchToTensor, SupportedTypes) { std::vector shape_sliced = {8, 3}; std::vector f_strides_sliced = {unit_size, unit_size * shape_sliced[0]}; - std::vector f_values_sliced = {2, 3, 4, 5, 6, 7, 8, 9, - 20, 30, 40, 50, 60, 70, 80, 90, - 100, 100, 100, 100, 100, 100, 100, 100}; - auto data_sliced = Buffer::Wrap(f_values_sliced); - - std::shared_ptr tensor_expected_sliced; - ASSERT_OK_AND_ASSIGN(tensor_expected_sliced, - Tensor::Make(TypeTraits::type_singleton(), data_sliced, - shape_sliced, f_strides_sliced)); + std::shared_ptr tensor_expected_sliced = + TensorFromJSON(TypeTraits::type_singleton(), + "[2, 3, 4, 5, 6, 7, 8, 9, 20, 30, 40, 50, 60, " + "70, 80, 90, 100, 100, 100, 100, 100, 100, 100, 100]", + shape_sliced, f_strides_sliced); EXPECT_TRUE(tensor_expected_sliced->Equals(*tensor_sliced)); CheckTensor(tensor_expected_sliced, 24, shape_sliced, f_strides_sliced); @@ -793,15 +780,10 @@ TYPED_TEST_P(TestBatchToTensor, SupportedTypes) { std::vector shape_sliced_1 = {5, 3}; std::vector f_strides_sliced_1 = {unit_size, unit_size * shape_sliced_1[0]}; - std::vector f_values_sliced_1 = { - 2, 3, 4, 5, 6, 20, 30, 40, 50, 60, 100, 100, 100, 100, 100, - }; - auto data_sliced_1 = Buffer::Wrap(f_values_sliced_1); - - std::shared_ptr tensor_expected_sliced_1; - ASSERT_OK_AND_ASSIGN(tensor_expected_sliced_1, - Tensor::Make(TypeTraits::type_singleton(), data_sliced_1, - shape_sliced_1, f_strides_sliced_1)); + std::shared_ptr tensor_expected_sliced_1 = + TensorFromJSON(TypeTraits::type_singleton(), + "[2, 3, 4, 5, 6, 20, 30, 40, 50, 60, 100, 100, 100, 100, 100]", + shape_sliced_1, f_strides_sliced_1); EXPECT_TRUE(tensor_expected_sliced_1->Equals(*tensor_sliced_1)); CheckTensor(tensor_expected_sliced_1, 15, shape_sliced_1, f_strides_sliced_1); diff --git a/cpp/src/arrow/testing/gtest_util.cc b/cpp/src/arrow/testing/gtest_util.cc index 37865948882da..95de16c715f19 100644 --- a/cpp/src/arrow/testing/gtest_util.cc +++ b/cpp/src/arrow/testing/gtest_util.cc @@ -50,9 +50,11 @@ #include "arrow/compute/api_vector.h" #include "arrow/datum.h" #include "arrow/ipc/json_simple.h" +#include "arrow/json/rapidjson_defs.h" // IWYU pragma: keep #include "arrow/pretty_print.h" #include "arrow/status.h" #include "arrow/table.h" +#include "arrow/tensor.h" #include "arrow/type.h" #include "arrow/util/checked_cast.h" #include "arrow/util/config.h" @@ -62,6 +64,10 @@ #include "arrow/util/thread_pool.h" #include "arrow/util/windows_compatibility.h" +#include + +namespace rj = arrow::rapidjson; + namespace arrow { using internal::checked_cast; @@ -425,6 +431,43 @@ std::shared_ptr TableFromJSON(const std::shared_ptr& schema, return *Table::FromRecordBatches(schema, std::move(batches)); } +std::shared_ptr TensorFromJSON(const std::shared_ptr& type, + std::string_view data, std::string_view shape, + std::string_view strides, + std::string_view dim_names) { + std::shared_ptr array = ArrayFromJSON(type, data); + + rj::Document json_shape; + json_shape.Parse(shape.data(), shape.length()); + std::vector shape_vector; + for (auto& x : json_shape.GetArray()) { + shape_vector.emplace_back(x.GetInt64()); + } + rj::Document json_strides; + json_strides.Parse(strides.data(), strides.length()); + std::vector strides_vector; + for (auto& x : json_strides.GetArray()) { + strides_vector.emplace_back(x.GetInt64()); + } + rj::Document json_dim_names; + json_dim_names.Parse(dim_names.data(), dim_names.length()); + std::vector dim_names_vector; + for (auto& x : json_dim_names.GetArray()) { + dim_names_vector.emplace_back(x.GetString()); + } + return *Tensor::Make(type, array->data()->buffers[1], shape_vector, strides_vector, + dim_names_vector); +} + +std::shared_ptr TensorFromJSON(const std::shared_ptr& type, + std::string_view data, + const std::vector& shape, + const std::vector& strides, + const std::vector& dim_names) { + std::shared_ptr array = ArrayFromJSON(type, data); + return *Tensor::Make(type, array->data()->buffers[1], shape, strides, dim_names); +} + Result> RunEndEncodeTableColumns( const Table& table, const std::vector& column_indices) { const int num_columns = table.num_columns(); diff --git a/cpp/src/arrow/testing/gtest_util.h b/cpp/src/arrow/testing/gtest_util.h index 916067d85b753..85b4c1f1f0138 100644 --- a/cpp/src/arrow/testing/gtest_util.h +++ b/cpp/src/arrow/testing/gtest_util.h @@ -354,6 +354,19 @@ ARROW_TESTING_EXPORT std::shared_ptr
TableFromJSON(const std::shared_ptr&, const std::vector& json); +ARROW_TESTING_EXPORT +std::shared_ptr TensorFromJSON(const std::shared_ptr& type, + std::string_view data, std::string_view shape, + std::string_view strides = "[]", + std::string_view dim_names = "[]"); + +ARROW_TESTING_EXPORT +std::shared_ptr TensorFromJSON(const std::shared_ptr& type, + std::string_view data, + const std::vector& shape, + const std::vector& strides = {}, + const std::vector& dim_names = {}); + ARROW_TESTING_EXPORT Result> RunEndEncodeTableColumns( const Table& table, const std::vector& column_indices); diff --git a/cpp/src/arrow/testing/gtest_util_test.cc b/cpp/src/arrow/testing/gtest_util_test.cc index 14c17a972aa06..9b4514197d776 100644 --- a/cpp/src/arrow/testing/gtest_util_test.cc +++ b/cpp/src/arrow/testing/gtest_util_test.cc @@ -21,6 +21,7 @@ #include "arrow/array/builder_decimal.h" #include "arrow/datum.h" #include "arrow/record_batch.h" +#include "arrow/tensor.h" #include "arrow/testing/gtest_util.h" #include "arrow/testing/random.h" #include "arrow/type.h" @@ -134,4 +135,40 @@ TEST_F(TestAssertContainsNaN, DatumEqual) { AssertDatumsEqual(expected_chunked, actual_chunked); } +class TestTensorFromJSON : public ::testing::Test {}; + +TEST_F(TestTensorFromJSON, FromJSONAndArray) { + std::vector shape = {9, 2}; + const int64_t i64_size = sizeof(int64_t); + std::vector f_strides = {i64_size, i64_size * shape[0]}; + std::vector f_values = {1, 2, 3, 4, 5, 6, 7, 8, 9, + 10, 20, 30, 40, 50, 60, 70, 80, 90}; + auto data = Buffer::Wrap(f_values); + + std::shared_ptr tensor_expected; + ASSERT_OK_AND_ASSIGN(tensor_expected, Tensor::Make(int64(), data, shape, f_strides)); + + std::shared_ptr result = TensorFromJSON( + int64(), "[1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 20, 30, 40, 50, 60, 70, 80, 90]", + shape, f_strides); + + EXPECT_TRUE(tensor_expected->Equals(*result)); +} + +TEST_F(TestTensorFromJSON, FromJSON) { + std::vector shape = {9, 2}; + std::vector values = {1, 2, 3, 4, 5, 6, 7, 8, 9, + 10, 20, 30, 40, 50, 60, 70, 80, 90}; + auto data = Buffer::Wrap(values); + + std::shared_ptr tensor_expected; + ASSERT_OK_AND_ASSIGN(tensor_expected, Tensor::Make(int64(), data, shape)); + + std::shared_ptr result = TensorFromJSON( + int64(), "[1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 20, 30, 40, 50, 60, 70, 80, 90]", + "[9, 2]"); + + EXPECT_TRUE(tensor_expected->Equals(*result)); +} + } // namespace arrow