diff --git a/cpp/src/arrow/array/array_binary.cc b/cpp/src/arrow/array/array_binary.cc index 19a653df00f8c..d83ba0ca8936d 100644 --- a/cpp/src/arrow/array/array_binary.cc +++ b/cpp/src/arrow/array/array_binary.cc @@ -107,7 +107,7 @@ BinaryViewArray::BinaryViewArray(std::shared_ptr type, int64_t length, std::string_view BinaryViewArray::GetView(int64_t i) const { const std::shared_ptr* data_buffers = data_->buffers.data() + 2; - return util::FromIndexOffsetBinaryView(raw_values_[i], data_buffers); + return util::FromBinaryView(raw_values_[i], data_buffers); } StringViewArray::StringViewArray(std::shared_ptr data) { diff --git a/cpp/src/arrow/array/array_binary_test.cc b/cpp/src/arrow/array/array_binary_test.cc index e6b1ed339a27e..04391be0ac789 100644 --- a/cpp/src/arrow/array/array_binary_test.cc +++ b/cpp/src/arrow/array/array_binary_test.cc @@ -405,41 +405,39 @@ TEST(StringViewArray, Validate) { // non-inline views are expected to reference only buffers managed by the array EXPECT_THAT( - MakeBinaryViewArray({buffer_s, buffer_y}, - {util::ToIndexOffsetBinaryView( - "supe", static_cast(buffer_s->size()), 0, 0), - util::ToIndexOffsetBinaryView( - "yyyy", static_cast(buffer_y->size()), 1, 0)}), + MakeBinaryViewArray( + {buffer_s, buffer_y}, + {util::ToBinaryView("supe", static_cast(buffer_s->size()), 0, 0), + util::ToBinaryView("yyyy", static_cast(buffer_y->size()), 1, 0)}), Ok()); // views may not reference data buffers not present in the array EXPECT_THAT( - MakeBinaryViewArray({}, {util::ToIndexOffsetBinaryView( - "supe", static_cast(buffer_s->size()), 0, 0)}), + MakeBinaryViewArray( + {}, {util::ToBinaryView("supe", static_cast(buffer_s->size()), 0, 0)}), Raises(StatusCode::IndexError)); // ... or ranges which overflow the referenced data buffer EXPECT_THAT( MakeBinaryViewArray( - {buffer_s}, {util::ToIndexOffsetBinaryView( + {buffer_s}, {util::ToBinaryView( "supe", static_cast(buffer_s->size() + 50), 0, 0)}), Raises(StatusCode::IndexError)); // Additionally, the prefixes of non-inline views must match the data buffer EXPECT_THAT( - MakeBinaryViewArray({buffer_s, buffer_y}, - {util::ToIndexOffsetBinaryView( - "SUPE", static_cast(buffer_s->size()), 0, 0), - util::ToIndexOffsetBinaryView( - "yyyy", static_cast(buffer_y->size()), 1, 0)}), + MakeBinaryViewArray( + {buffer_s, buffer_y}, + {util::ToBinaryView("SUPE", static_cast(buffer_s->size()), 0, 0), + util::ToBinaryView("yyyy", static_cast(buffer_y->size()), 1, 0)}), Raises(StatusCode::Invalid)); // Invalid string views which are masked by a null bit do not cause validation to fail auto invalid_but_masked = - MakeBinaryViewArray({buffer_s}, - {util::ToIndexOffsetBinaryView( - "SUPE", static_cast(buffer_s->size()), 0, 0), - util::ToIndexOffsetBinaryView("yyyy", 50, 40, 30)}, - /*validate=*/false) + MakeBinaryViewArray( + {buffer_s}, + {util::ToBinaryView("SUPE", static_cast(buffer_s->size()), 0, 0), + util::ToBinaryView("yyyy", 50, 40, 30)}, + /*validate=*/false) .ValueOrDie() ->data(); invalid_but_masked->null_count = 2; @@ -447,19 +445,19 @@ TEST(StringViewArray, Validate) { EXPECT_THAT(internal::ValidateArrayFull(*invalid_but_masked), Ok()); // overlapping views are allowed - EXPECT_THAT(MakeBinaryViewArray( - {buffer_s}, - { - util::ToIndexOffsetBinaryView( - "supe", static_cast(buffer_s->size()), 0, 0), - util::ToIndexOffsetBinaryView( - "uper", static_cast(buffer_s->size() - 1), 0, 1), - util::ToIndexOffsetBinaryView( - "perc", static_cast(buffer_s->size() - 2), 0, 2), - util::ToIndexOffsetBinaryView( - "erca", static_cast(buffer_s->size() - 3), 0, 3), - }), - Ok()); + EXPECT_THAT( + MakeBinaryViewArray( + {buffer_s}, + { + util::ToBinaryView("supe", static_cast(buffer_s->size()), 0, 0), + util::ToBinaryView("uper", static_cast(buffer_s->size() - 1), 0, + 1), + util::ToBinaryView("perc", static_cast(buffer_s->size() - 2), 0, + 2), + util::ToBinaryView("erca", static_cast(buffer_s->size() - 3), 0, + 3), + }), + Ok()); } template @@ -982,13 +980,7 @@ class TestBaseBinaryDataVisitor : public ::testing::Test { public: using TypeClass = T; - void SetUp() override { - if constexpr (is_binary_view_like_type::value) { - type_ = TypeClass::is_utf8 ? utf8_view() : binary_view(); - } else { - type_ = TypeTraits::type_singleton(); - } - } + void SetUp() override { type_ = TypeTraits::type_singleton(); } void TestBasics() { auto array = ArrayFromJSON( diff --git a/cpp/src/arrow/array/builder_binary.cc b/cpp/src/arrow/array/builder_binary.cc index 932277a4f374b..3ff22d4a3feeb 100644 --- a/cpp/src/arrow/array/builder_binary.cc +++ b/cpp/src/arrow/array/builder_binary.cc @@ -35,6 +35,7 @@ #include "arrow/util/checked_cast.h" #include "arrow/util/decimal.h" #include "arrow/util/logging.h" +#include "arrow/visit_data_inline.h" namespace arrow { @@ -51,12 +52,17 @@ Status BinaryViewBuilder::AppendArraySlice(const ArraySpan& array, int64_t offse auto bitmap = array.GetValues(0, 0); auto values = array.GetValues(1) + offset; - int64_t out_of_line_total = 0; - for (int64_t i = 0; i < length; i++) { - if (!values[i].is_inline()) { - out_of_line_total += static_cast(values[i].size()); - } - } + int64_t out_of_line_total = 0, i = 0; + VisitNullBitmapInline( + array.buffers[0].data, array.offset, array.length, array.null_count, + [&] { + if (!values[i].is_inline()) { + out_of_line_total += static_cast(values[i].size()); + } + ++i; + }, + [&] { ++i; }); + RETURN_NOT_OK(Reserve(length)); RETURN_NOT_OK(ReserveData(out_of_line_total)); @@ -66,8 +72,7 @@ Status BinaryViewBuilder::AppendArraySlice(const ArraySpan& array, int64_t offse continue; } - UnsafeAppend( - util::FromIndexOffsetBinaryView(values[i], array.GetVariadicBuffers().data())); + UnsafeAppend(util::FromBinaryView(values[i], array.GetVariadicBuffers().data())); } return Status::OK(); } diff --git a/cpp/src/arrow/array/builder_binary.h b/cpp/src/arrow/array/builder_binary.h index 1da9ed88fe3ba..f5b335b87a26d 100644 --- a/cpp/src/arrow/array/builder_binary.h +++ b/cpp/src/arrow/array/builder_binary.h @@ -500,9 +500,9 @@ class ARROW_EXPORT StringHeapBuilder { ARROW_RETURN_NOT_OK(Reserve(length)); } - auto v = util::ToIndexOffsetBinaryView(value, static_cast(length), - static_cast(blocks_.size() - 1), - current_offset_); + auto v = + util::ToBinaryView(value, static_cast(length), + static_cast(blocks_.size() - 1), current_offset_); memcpy(current_out_buffer_, value, static_cast(length)); current_out_buffer_ += length; diff --git a/cpp/src/arrow/array/concatenate.cc b/cpp/src/arrow/array/concatenate.cc index f8c5762910b90..682e4945a027f 100644 --- a/cpp/src/arrow/array/concatenate.cc +++ b/cpp/src/arrow/array/concatenate.cc @@ -242,7 +242,7 @@ class ConcatenateImpl { ARROW_ASSIGN_OR_RAISE(auto view_buffers, Buffers(1, BinaryViewType::kSize)); ARROW_ASSIGN_OR_RAISE(auto view_buffer, ConcatenateBuffers(view_buffers, pool_)); - auto* s = view_buffer->mutable_data_as(); + auto* views = view_buffer->mutable_data_as(); size_t preceding_buffer_count = 0; int64_t i = in_[0]->length; @@ -250,8 +250,9 @@ class ConcatenateImpl { preceding_buffer_count += in_[in_index - 1]->buffers.size() - 2; for (int64_t end_i = i + in_[in_index]->length; i < end_i; ++i) { - if (s[i].is_inline()) continue; - s[i].ref.buffer_index += static_cast(preceding_buffer_count); + if (views[i].is_inline()) continue; + views[i].ref.buffer_index = SafeSignedAdd( + views[i].ref.buffer_index, static_cast(preceding_buffer_count)); } } diff --git a/cpp/src/arrow/array/concatenate_test.cc b/cpp/src/arrow/array/concatenate_test.cc index 55535f2c15433..e49f65e15e15d 100644 --- a/cpp/src/arrow/array/concatenate_test.cc +++ b/cpp/src/arrow/array/concatenate_test.cc @@ -99,8 +99,8 @@ class ConcatenateTest : public ::testing::Test { for (auto slice : slices) { ASSERT_OK(slice->ValidateFull()); } - ASSERT_OK(expected->ValidateFull()); ASSERT_OK_AND_ASSIGN(auto actual, Concatenate(slices)); + ASSERT_OK(actual->ValidateFull()); AssertArraysEqual(*expected, *actual); if (actual->data()->buffers[0]) { CheckTrailingBitsAreZeroed(actual->data()->buffers[0], actual->length()); @@ -163,7 +163,7 @@ TEST_F(ConcatenateTest, StringType) { TEST_F(ConcatenateTest, StringViewType) { Check([this](int32_t size, double null_probability, std::shared_ptr* out) { - *out = rng_.StringView(size, /*min_length =*/0, /*max_length =*/15, null_probability); + *out = rng_.StringView(size, /*min_length =*/0, /*max_length =*/40, null_probability); ASSERT_OK((**out).ValidateFull()); }); } diff --git a/cpp/src/arrow/array/data.cc b/cpp/src/arrow/array/data.cc index a59da54f0fe13..678513fd4d151 100644 --- a/cpp/src/arrow/array/data.cc +++ b/cpp/src/arrow/array/data.cc @@ -93,7 +93,7 @@ bool RunEndEncodedMayHaveLogicalNulls(const ArrayData& data) { return ArraySpan(data).MayHaveLogicalNulls(); } -BufferSpan PackVariadicBuffers(util::span const> buffers) { +BufferSpan PackVariadicBuffers(util::span> buffers) { return {const_cast(reinterpret_cast(buffers.data())), static_cast(buffers.size() * sizeof(std::shared_ptr))}; } @@ -372,7 +372,7 @@ void ArraySpan::FillFromScalar(const Scalar& value) { static_assert(sizeof(BinaryViewType::c_type) <= sizeof(scalar.scratch_space_)); auto* view = new (&scalar.scratch_space_) BinaryViewType::c_type; if (scalar.is_valid) { - *view = util::ToIndexOffsetBinaryView(std::string_view{*scalar.value}, 0, 0); + *view = util::ToBinaryView(std::string_view{*scalar.value}, 0, 0); this->buffers[2] = internal::PackVariadicBuffers({&scalar.value, 1}); } else { *view = {}; @@ -565,7 +565,7 @@ std::shared_ptr ArraySpan::ToArrayData() const { return result; } -util::span const> ArraySpan::GetVariadicBuffers() const { +util::span> ArraySpan::GetVariadicBuffers() const { DCHECK(HasVariadicBuffers()); return {buffers[2].data_as>(), buffers[2].size / sizeof(std::shared_ptr)}; diff --git a/cpp/src/arrow/array/data.h b/cpp/src/arrow/array/data.h index c54068986359d..40a77640cd1e5 100644 --- a/cpp/src/arrow/array/data.h +++ b/cpp/src/arrow/array/data.h @@ -540,7 +540,7 @@ struct ARROW_EXPORT ArraySpan { /// sizeof(shared_ptr). /// /// \see HasVariadicBuffers - util::span const> GetVariadicBuffers() const; + util::span> GetVariadicBuffers() const; bool HasVariadicBuffers() const; private: diff --git a/cpp/src/arrow/array/util.cc b/cpp/src/arrow/array/util.cc index 369cf1a652dae..9ea2fc2b6f0a1 100644 --- a/cpp/src/arrow/array/util.cc +++ b/cpp/src/arrow/array/util.cc @@ -680,7 +680,7 @@ class RepeatedArrayFactory { template enable_if_binary_view_like Visit(const T& type) { std::string_view value{*scalar().value}; - auto s = util::ToIndexOffsetBinaryView(value, 0, 0); + auto s = util::ToBinaryView(value, 0, 0); RETURN_NOT_OK(FinishFixedWidth(&s, sizeof(s))); if (!s.is_inline()) { out_->data()->buffers.push_back(scalar().value); diff --git a/cpp/src/arrow/array/util.h b/cpp/src/arrow/array/util.h index a340caca03074..9f34af0525d96 100644 --- a/cpp/src/arrow/array/util.h +++ b/cpp/src/arrow/array/util.h @@ -86,18 +86,5 @@ Result> SwapEndianArrayData( ARROW_EXPORT std::vector RechunkArraysConsistently(const std::vector&); -/// Convert between index/offset and raw pointer views. -/// -/// This function can be used to overwrite a buffer of views if desired, -/// IE it is supported for `in.buffers[1].data == out`. -/// -/// Note that calling this function is not necessary if all views happen to be -/// Inline; this is usually efficiently detectable by checking for an absence of any -/// data buffers. -/// -/// Will raise IndexError if a view refers to memory outside the data buffers. -ARROW_EXPORT -Status SwapBinaryViewPointers(const ArraySpan& in, BinaryViewType::c_type* out); - } // namespace internal } // namespace arrow diff --git a/cpp/src/arrow/array/validate.cc b/cpp/src/arrow/array/validate.cc index 142bab151744a..3dde41b1450e8 100644 --- a/cpp/src/arrow/array/validate.cc +++ b/cpp/src/arrow/array/validate.cc @@ -650,7 +650,18 @@ struct ValidateArrayImpl { views[i].size()); } - if (views[i].is_inline()) continue; + if (views[i].is_inline()) { + auto padding_bytes = util::span(views[i].inlined.data).subspan(views[i].size()); + for (auto padding_byte : padding_bytes) { + if (padding_byte != 0) { + return Status::Invalid("View at slot ", i, " was inline with size ", + views[i].size(), + " but its padding bytes were not all zero: ", + HexEncode(padding_bytes.data(), padding_bytes.size())); + } + } + continue; + } auto [size, prefix, buffer_index, offset] = views[i].ref; diff --git a/cpp/src/arrow/compare.cc b/cpp/src/arrow/compare.cc index 409b21c90b6d1..50cfdd05a14bb 100644 --- a/cpp/src/arrow/compare.cc +++ b/cpp/src/arrow/compare.cc @@ -271,8 +271,8 @@ class RangeDataEqualsImpl { auto* right_buffers = right_.buffers.data() + 2; VisitValidRuns([&](int64_t i, int64_t length) { for (auto end_i = i + length; i < end_i; ++i) { - if (!util::EqualIndexOffsetBinaryView(left_values[i], right_values[i], - left_buffers, right_buffers)) { + if (!util::EqualBinaryView(left_values[i], right_values[i], left_buffers, + right_buffers)) { return false; } } diff --git a/cpp/src/arrow/integration/json_internal.cc b/cpp/src/arrow/integration/json_internal.cc index 1f19e891f425c..c4f44d21bed38 100644 --- a/cpp/src/arrow/integration/json_internal.cc +++ b/cpp/src/arrow/integration/json_internal.cc @@ -107,9 +107,11 @@ std::string GetTimeUnitName(TimeUnit::type unit) { return "UNKNOWN"; } -std::string_view GetStringView(const rj::Value& str) { - DCHECK(str.IsString()); - return {str.GetString(), str.GetStringLength()}; +Result GetStringView(const rj::Value& str) { + if (!str.IsString()) { + return Status::Invalid("field was not a string line ", __LINE__); + } + return std::string_view{str.GetString(), str.GetStringLength()}; } class SchemaWriter { @@ -1413,7 +1415,7 @@ class ArrayReader { BufferVector buffers; buffers.resize(json_variadic_bufs.Size() + 2); for (auto [json_buf, buf] : Zip(json_variadic_bufs, span{buffers}.subspan(2))) { - auto hex_string = GetStringView(json_buf); + ARROW_ASSIGN_OR_RAISE(auto hex_string, GetStringView(json_buf)); ARROW_ASSIGN_OR_RAISE( buf, AllocateBuffer(static_cast(hex_string.size()) / 2, pool_)); RETURN_NOT_OK(ParseHexValues(hex_string, buf->mutable_data())); @@ -1433,9 +1435,9 @@ class ArrayReader { static_cast(length_)}; int64_t null_count = 0; - for (auto [json_view, s, is_valid] : Zip(json_views, views, is_valid_)) { + for (auto [json_view, out_view, is_valid] : Zip(json_views, views, is_valid_)) { if (!is_valid) { - s = {}; + out_view = {}; ++null_count; continue; } @@ -1451,16 +1453,16 @@ class ArrayReader { if (size <= BinaryViewType::kInlineSize) { auto json_inlined = json_view_obj.FindMember("INLINED"); RETURN_NOT_STRING("INLINED", json_inlined, json_view_obj); - s.inlined = {size, {}}; + out_view.inlined = {size, {}}; if constexpr (ViewType::is_utf8) { DCHECK_LE(json_inlined->value.GetStringLength(), BinaryViewType::kInlineSize); - memcpy(&s.inlined.data, json_inlined->value.GetString(), size); + memcpy(&out_view.inlined.data, json_inlined->value.GetString(), size); } else { DCHECK_LE(json_inlined->value.GetStringLength(), BinaryViewType::kInlineSize * 2); - RETURN_NOT_OK( - ParseHexValues(GetStringView(json_inlined->value), s.inlined.data.data())); + ARROW_ASSIGN_OR_RAISE(auto inlined, GetStringView(json_inlined->value)); + RETURN_NOT_OK(ParseHexValues(inlined, out_view.inlined.data.data())); } continue; } @@ -1472,7 +1474,7 @@ class ArrayReader { RETURN_NOT_INT("BUFFER_INDEX", json_buffer_index, json_view_obj); RETURN_NOT_INT("OFFSET", json_offset, json_view_obj); - s.ref = { + out_view.ref = { size, {}, static_cast(json_buffer_index->value.GetInt64()), @@ -1480,12 +1482,12 @@ class ArrayReader { }; DCHECK_EQ(json_prefix->value.GetStringLength(), BinaryViewType::kPrefixSize * 2); - RETURN_NOT_OK( - ParseHexValues(GetStringView(json_prefix->value), s.ref.prefix.data())); + ARROW_ASSIGN_OR_RAISE(auto prefix, GetStringView(json_prefix->value)); + RETURN_NOT_OK(ParseHexValues(prefix, out_view.ref.prefix.data())); - DCHECK_LE(static_cast(s.ref.buffer_index), buffers.size() - 2); - DCHECK_LE(static_cast(s.ref.offset) + s.size(), - buffers[s.ref.buffer_index + 2]->size()); + DCHECK_LE(static_cast(out_view.ref.buffer_index), buffers.size() - 2); + DCHECK_LE(static_cast(out_view.ref.offset) + out_view.size(), + buffers[out_view.ref.buffer_index + 2]->size()); } data_ = ArrayData::Make(type_, length_, std::move(buffers), null_count); diff --git a/cpp/src/arrow/ipc/metadata_internal.cc b/cpp/src/arrow/ipc/metadata_internal.cc index 9350fb4ac6096..ab1a58dd1df8b 100644 --- a/cpp/src/arrow/ipc/metadata_internal.cc +++ b/cpp/src/arrow/ipc/metadata_internal.cc @@ -996,7 +996,10 @@ static Status MakeRecordBatch(FBB& fbb, int64_t length, int64_t body_length, BodyCompressionOffset fb_compression; RETURN_NOT_OK(GetBodyCompression(fbb, options, &fb_compression)); - auto fb_variadic_buffer_counts = fbb.CreateVector(variadic_buffer_counts); + flatbuffers::Offset> fb_variadic_buffer_counts{}; + if (!variadic_buffer_counts.empty()) { + fb_variadic_buffer_counts = fbb.CreateVector(variadic_buffer_counts); + } *offset = flatbuf::CreateRecordBatch(fbb, length, fb_nodes, fb_buffers, fb_compression, fb_variadic_buffer_counts); diff --git a/cpp/src/arrow/ipc/reader.cc b/cpp/src/arrow/ipc/reader.cc index 1bd14ac120cba..d603062d81d4a 100644 --- a/cpp/src/arrow/ipc/reader.cc +++ b/cpp/src/arrow/ipc/reader.cc @@ -59,7 +59,6 @@ #include "arrow/util/thread_pool.h" #include "arrow/util/ubsan.h" #include "arrow/util/vector.h" -#include "arrow/visit_data_inline.h" #include "arrow/visit_type_inline.h" #include "generated/File_generated.h" // IWYU pragma: export diff --git a/cpp/src/arrow/testing/random.h b/cpp/src/arrow/testing/random.h index 1cf57cdb4e939..cbdac3baa0109 100644 --- a/cpp/src/arrow/testing/random.h +++ b/cpp/src/arrow/testing/random.h @@ -374,10 +374,11 @@ class ARROW_TESTING_EXPORT RandomArrayGenerator { /// determined by the uniform distribution /// \param[in] max_length the upper bound of the string length /// determined by the uniform distribution + /// \param[in] null_probability the probability of a value being null /// \param[in] max_data_buffer_length the data buffer size at which /// a new chunk will be generated /// \param[in] alignment alignment for memory allocations (in bytes) - /// \param[in] null_probability the probability of a value being null + /// \param[in] memory_pool memory pool to allocate memory from /// /// \return a generated Array std::shared_ptr StringView(int64_t size, int32_t min_length, int32_t max_length, diff --git a/cpp/src/arrow/type.cc b/cpp/src/arrow/type.cc index 5feeaf84d95e1..f378bd974047d 100644 --- a/cpp/src/arrow/type.cc +++ b/cpp/src/arrow/type.cc @@ -256,11 +256,7 @@ struct PhysicalTypeVisitor { template Status Visit(const Type& type) { - if constexpr (std::is_base_of_v) { - result = binary_view(); - } else { - result = TypeTraits::type_singleton(); - } + result = TypeTraits::type_singleton(); return Status::OK(); } }; diff --git a/cpp/src/arrow/type_traits.h b/cpp/src/arrow/type_traits.h index 898e6465ef299..738aee08a3b65 100644 --- a/cpp/src/arrow/type_traits.h +++ b/cpp/src/arrow/type_traits.h @@ -347,7 +347,8 @@ struct TypeTraits { using BuilderType = BinaryViewBuilder; using ScalarType = BinaryViewScalar; using CType = BinaryViewType::c_type; - constexpr static bool is_parameter_free = false; + constexpr static bool is_parameter_free = true; + static inline std::shared_ptr type_singleton() { return binary_view(); } }; template <> @@ -386,7 +387,8 @@ struct TypeTraits { using BuilderType = StringViewBuilder; using ScalarType = StringViewScalar; using CType = BinaryViewType::c_type; - constexpr static bool is_parameter_free = false; + constexpr static bool is_parameter_free = true; + static inline std::shared_ptr type_singleton() { return utf8_view(); } }; template <> diff --git a/cpp/src/arrow/util/assume_aligned.h b/cpp/src/arrow/util/assume_aligned.h deleted file mode 100644 index dfcbb0e6289e3..0000000000000 --- a/cpp/src/arrow/util/assume_aligned.h +++ /dev/null @@ -1,45 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, -// software distributed under the License is distributed on an -// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, either express or implied. See the License for the -// specific language governing permissions and limitations -// under the License. - -#pragma once - -#include - -namespace arrow::util { - -template -[[nodiscard]] constexpr P* AssumeAligned(P* ptr) { -#if defined(__has_builtin) -#if __has_builtin(__builtin_assume_aligned) -#define ARROW_HAS_BUILTIN_ASSUME_ALIGNED -#endif -#endif - -#if defined(ARROW_HAS_BUILTIN_ASSUME_ALIGNED) -#undef ARROW_HAS_BUILTIN_ASSUME_ALIGNED - return static_cast(__builtin_assume_aligned(ptr, N)); -#else - return ptr; -#endif -} - -template -[[nodiscard]] constexpr P* AssumeAlignedAs(P* ptr) { - return AssumeAligned(ptr); -} - -} // namespace arrow::util diff --git a/cpp/src/arrow/util/binary_view_util.h b/cpp/src/arrow/util/binary_view_util.h index e14da5d7e138c..13f05e0e7c808 100644 --- a/cpp/src/arrow/util/binary_view_util.h +++ b/cpp/src/arrow/util/binary_view_util.h @@ -21,7 +21,6 @@ #include #include "arrow/type.h" -#include "arrow/util/assume_aligned.h" #include "arrow/util/span.h" namespace arrow::util { @@ -38,9 +37,8 @@ inline BinaryViewType::c_type ToInlineBinaryView(std::string_view v) { return ToInlineBinaryView(v.data(), static_cast(v.size())); } -inline BinaryViewType::c_type ToIndexOffsetBinaryView(const void* data, int32_t size, - int32_t buffer_index, - int32_t offset) { +inline BinaryViewType::c_type ToBinaryView(const void* data, int32_t size, + int32_t buffer_index, int32_t offset) { if (size <= BinaryViewType::kInlineSize) { return ToInlineBinaryView(data, size); } @@ -52,27 +50,24 @@ inline BinaryViewType::c_type ToIndexOffsetBinaryView(const void* data, int32_t return out; } -inline BinaryViewType::c_type ToIndexOffsetBinaryView(std::string_view v, - int32_t buffer_index, - int32_t offset) { - return ToIndexOffsetBinaryView(v.data(), static_cast(v.size()), buffer_index, - offset); +inline BinaryViewType::c_type ToBinaryView(std::string_view v, int32_t buffer_index, + int32_t offset) { + return ToBinaryView(v.data(), static_cast(v.size()), buffer_index, offset); } template -std::string_view FromIndexOffsetBinaryView(const BinaryViewType::c_type& v, - const BufferPtr* data_buffers) { +std::string_view FromBinaryView(const BinaryViewType::c_type& v, + const BufferPtr* data_buffers) { auto* data = v.is_inline() ? v.inlined.data.data() : data_buffers[v.ref.buffer_index]->data() + v.ref.offset; return {reinterpret_cast(data), static_cast(v.size())}; } template -std::string_view FromIndexOffsetBinaryView(BinaryViewType::c_type&&, - const BufferPtr*) = delete; +std::string_view FromBinaryView(BinaryViewType::c_type&&, const BufferPtr*) = delete; -template -bool EqualIndexOffsetBinaryView(BinaryViewType::c_type l, BinaryViewType::c_type r, - const BufferPtr*... buffers) { +template +bool EqualBinaryView(BinaryViewType::c_type l, BinaryViewType::c_type r, + const BufferPtr* l_buffers, const BufferPtr* r_buffers) { int64_t l_size_and_prefix, r_size_and_prefix; memcpy(&l_size_and_prefix, &l, sizeof(l_size_and_prefix)); memcpy(&r_size_and_prefix, &r, sizeof(r_size_and_prefix)); @@ -83,18 +78,13 @@ bool EqualIndexOffsetBinaryView(BinaryViewType::c_type l, BinaryViewType::c_type // The inline part is zeroed at construction, so we can compare // a word at a time if data extends past 'prefix_'. int64_t l_inlined, r_inlined; - memcpy(&l_inlined, - AssumeAlignedAs(l.inline_data() + BinaryViewType::kPrefixSize), - sizeof(l_size_and_prefix)); - memcpy(&r_inlined, - AssumeAlignedAs(r.inline_data() + BinaryViewType::kPrefixSize), - sizeof(r_size_and_prefix)); + memcpy(&l_inlined, l.inline_data() + BinaryViewType::kPrefixSize, sizeof(l_inlined)); + memcpy(&r_inlined, r.inline_data() + BinaryViewType::kPrefixSize, sizeof(r_inlined)); return l_inlined == r_inlined; } // Sizes are equal and this is not inline, therefore both are out // of line and have kPrefixSize first in common. - auto [l_buffers, r_buffers] = std::pair{buffers...}; const uint8_t* l_data = l_buffers[l.ref.buffer_index]->data() + l.ref.offset; const uint8_t* r_data = r_buffers[r.ref.buffer_index]->data() + r.ref.offset; return memcmp(l_data + BinaryViewType::kPrefixSize, diff --git a/cpp/src/arrow/visit_data_inline.h b/cpp/src/arrow/visit_data_inline.h index dcfb48dc477fd..874cf5b514452 100644 --- a/cpp/src/arrow/visit_data_inline.h +++ b/cpp/src/arrow/visit_data_inline.h @@ -161,7 +161,7 @@ struct ArraySpanInlineVisitor> { return VisitBitBlocks( arr.buffers[0].data, arr.offset, arr.length, [&](int64_t index) { - return valid_func(util::FromIndexOffsetBinaryView(s[index], data_buffers)); + return valid_func(util::FromBinaryView(s[index], data_buffers)); }, [&]() { return null_func(); }); } @@ -177,7 +177,7 @@ struct ArraySpanInlineVisitor> { VisitBitBlocksVoid( arr.buffers[0].data, arr.offset, arr.length, [&](int64_t index) { - valid_func(util::FromIndexOffsetBinaryView(s[index], data_buffers)); + valid_func(util::FromBinaryView(s[index], data_buffers)); }, std::forward(null_func)); }