From f769b1c10d9cd23bca078a34f92b03d85718f3cf Mon Sep 17 00:00:00 2001 From: mwish Date: Tue, 2 Jan 2024 13:09:39 +0800 Subject: [PATCH] [C++][Parquet] Minor: Using arrow::Buffer data_as api to replace the reinterpret_cast --- cpp/src/parquet/encoding.cc | 74 +++++++++++++++++-------------------- 1 file changed, 33 insertions(+), 41 deletions(-) diff --git a/cpp/src/parquet/encoding.cc b/cpp/src/parquet/encoding.cc index 840efa12cc3c1..6553aabd60054 100644 --- a/cpp/src/parquet/encoding.cc +++ b/cpp/src/parquet/encoding.cc @@ -125,7 +125,7 @@ class PlainEncoder : public EncoderImpl, virtual public TypedEncoder { if (valid_bits != NULLPTR) { PARQUET_ASSIGN_OR_THROW(auto buffer, ::arrow::AllocateBuffer(num_values * sizeof(T), this->memory_pool())); - T* data = reinterpret_cast(buffer->mutable_data()); + T* data = buffer->template mutable_data_as(); int num_valid_values = ::arrow::util::internal::SpacedCompress( src, num_values, valid_bits, valid_bits_offset, data); Put(data, num_valid_values); @@ -323,7 +323,7 @@ class PlainEncoder : public EncoderImpl, virtual public BooleanEnco if (valid_bits != NULLPTR) { PARQUET_ASSIGN_OR_THROW(auto buffer, ::arrow::AllocateBuffer(num_values * sizeof(T), this->memory_pool())); - T* data = reinterpret_cast(buffer->mutable_data()); + T* data = buffer->mutable_data_as(); int num_valid_values = ::arrow::util::internal::SpacedCompress( src, num_values, valid_bits, valid_bits_offset, data); Put(data, num_valid_values); @@ -882,7 +882,7 @@ void ByteStreamSplitEncoder::PutSpaced(const T* src, int num_values, if (valid_bits != NULLPTR) { PARQUET_ASSIGN_OR_THROW(auto buffer, ::arrow::AllocateBuffer(num_values * sizeof(T), this->memory_pool())); - T* data = reinterpret_cast(buffer->mutable_data()); + T* data = buffer->template mutable_data_as(); int num_valid_values = ::arrow::util::internal::SpacedCompress( src, num_values, valid_bits, valid_bits_offset, data); Put(data, num_valid_values); @@ -1080,7 +1080,7 @@ inline int DecodePlain(const uint8_t* data, int64_t data_size ParquetException::EofException(); } for (int i = 0; i < num_values; ++i) { - out[i].ptr = data + i * type_length; + out[i].ptr = data + i * static_cast(type_length); } return static_cast(bytes_to_decode); } @@ -1537,9 +1537,8 @@ class DictDecoderImpl : public DecoderImpl, virtual public DictDecoder { int Decode(T* buffer, int num_values) override { num_values = std::min(num_values, num_values_); - int decoded_values = - idx_decoder_.GetBatchWithDict(reinterpret_cast(dictionary_->data()), - dictionary_length_, buffer, num_values); + int decoded_values = idx_decoder_.GetBatchWithDict( + dictionary_->data_as(), dictionary_length_, buffer, num_values); if (decoded_values != num_values) { ParquetException::EofException(); } @@ -1551,9 +1550,8 @@ class DictDecoderImpl : public DecoderImpl, virtual public DictDecoder { int64_t valid_bits_offset) override { num_values = std::min(num_values, num_values_); if (num_values != idx_decoder_.GetBatchWithDictSpaced( - reinterpret_cast(dictionary_->data()), - dictionary_length_, buffer, num_values, null_count, valid_bits, - valid_bits_offset)) { + dictionary_->data_as(), dictionary_length_, buffer, + num_values, null_count, valid_bits, valid_bits_offset)) { ParquetException::EofException(); } num_values_ -= num_values; @@ -1580,8 +1578,7 @@ class DictDecoderImpl : public DecoderImpl, virtual public DictDecoder { num_values, /*shrink_to_fit=*/false)); } - auto indices_buffer = - reinterpret_cast(indices_scratch_space_->mutable_data()); + auto indices_buffer = indices_scratch_space_->mutable_data_as(); if (num_values != idx_decoder_.GetBatchSpaced(num_values, null_count, valid_bits, valid_bits_offset, indices_buffer)) { @@ -1611,8 +1608,7 @@ class DictDecoderImpl : public DecoderImpl, virtual public DictDecoder { PARQUET_THROW_NOT_OK(indices_scratch_space_->TypedResize( num_values, /*shrink_to_fit=*/false)); } - auto indices_buffer = - reinterpret_cast(indices_scratch_space_->mutable_data()); + auto indices_buffer = indices_scratch_space_->mutable_data_as(); if (num_values != idx_decoder_.GetBatch(indices_buffer, num_values)) { ParquetException::EofException(); } @@ -1632,7 +1628,7 @@ class DictDecoderImpl : public DecoderImpl, virtual public DictDecoder { void GetDictionary(const T** dictionary, int32_t* dictionary_length) override { *dictionary_length = dictionary_length_; - *dictionary = reinterpret_cast(dictionary_->mutable_data()); + *dictionary = dictionary_->mutable_data_as(); } protected: @@ -1647,8 +1643,7 @@ class DictDecoderImpl : public DecoderImpl, virtual public DictDecoder { dictionary_length_ = static_cast(dictionary->values_left()); PARQUET_THROW_NOT_OK(dictionary_->Resize(dictionary_length_ * sizeof(T), /*shrink_to_fit=*/false)); - dictionary->Decode(reinterpret_cast(dictionary_->mutable_data()), - dictionary_length_); + dictionary->Decode(dictionary_->mutable_data_as(), dictionary_length_); } // Only one is set. @@ -1688,7 +1683,7 @@ template <> void DictDecoderImpl::SetDict(TypedDecoder* dictionary) { DecodeDict(dictionary); - auto dict_values = reinterpret_cast(dictionary_->mutable_data()); + ByteArray* dict_values = dictionary_->mutable_data_as(); int total_size = 0; for (int i = 0; i < dictionary_length_; ++i) { @@ -1702,8 +1697,7 @@ void DictDecoderImpl::SetDict(TypedDecoder* dictio int32_t offset = 0; uint8_t* bytes_data = byte_array_data_->mutable_data(); - int32_t* bytes_offsets = - reinterpret_cast(byte_array_offsets_->mutable_data()); + int32_t* bytes_offsets = byte_array_offsets_->mutable_data_as(); for (int i = 0; i < dictionary_length_; ++i) { memcpy(bytes_data + offset, dict_values[i].ptr, dict_values[i].len); bytes_offsets[i] = offset; @@ -1717,7 +1711,7 @@ template <> inline void DictDecoderImpl::SetDict(TypedDecoder* dictionary) { DecodeDict(dictionary); - auto dict_values = reinterpret_cast(dictionary_->mutable_data()); + auto dict_values = dictionary_->mutable_data_as(); int fixed_len = descr_->type_length(); int total_size = dictionary_length_ * fixed_len; @@ -1765,7 +1759,7 @@ int DictDecoderImpl::DecodeArrow( typename EncodingTraits::DictAccumulator* builder) { PARQUET_THROW_NOT_OK(builder->Reserve(num_values)); - auto dict_values = reinterpret_cast(dictionary_->data()); + auto dict_values = dictionary_->data_as(); VisitNullBitmapInline( valid_bits, valid_bits_offset, num_values, null_count, @@ -1801,7 +1795,7 @@ inline int DictDecoderImpl::DecodeArrow( PARQUET_THROW_NOT_OK(builder->Reserve(num_values)); - auto dict_values = reinterpret_cast(dictionary_->data()); + const FLBA* dict_values = dictionary_->data_as(); VisitNullBitmapInline( valid_bits, valid_bits_offset, num_values, null_count, @@ -1834,7 +1828,7 @@ int DictDecoderImpl::DecodeArrow( PARQUET_THROW_NOT_OK(builder->Reserve(num_values)); - auto dict_values = reinterpret_cast(dictionary_->data()); + const FLBA* dict_values = dictionary_->data_as(); VisitNullBitmapInline( valid_bits, valid_bits_offset, num_values, null_count, @@ -1858,7 +1852,7 @@ int DictDecoderImpl::DecodeArrow( PARQUET_THROW_NOT_OK(builder->Reserve(num_values)); using value_type = typename Type::c_type; - auto dict_values = reinterpret_cast(dictionary_->data()); + const auto* dict_values = dictionary_->data_as(); VisitNullBitmapInline( valid_bits, valid_bits_offset, num_values, null_count, @@ -1936,7 +1930,7 @@ class DictByteArrayDecoderImpl : public DictDecoderImpl, // space for binary data. RETURN_NOT_OK(helper.Prepare()); - auto dict_values = reinterpret_cast(dictionary_->data()); + const ByteArray* dict_values = dictionary_->data_as(); int values_decoded = 0; int num_indices = 0; int pos_indices = 0; @@ -2007,7 +2001,7 @@ class DictByteArrayDecoderImpl : public DictDecoderImpl, // space for binary data. RETURN_NOT_OK(helper.Prepare()); - auto dict_values = reinterpret_cast(dictionary_->data()); + const auto* dict_values = dictionary_->data_as(); while (values_decoded < num_values) { const int32_t batch_size = @@ -2037,7 +2031,7 @@ class DictByteArrayDecoderImpl : public DictDecoderImpl, RETURN_NOT_OK(builder->Reserve(num_values)); ::arrow::internal::BitmapReader bit_reader(valid_bits, valid_bits_offset, num_values); - auto dict_values = reinterpret_cast(dictionary_->data()); + const auto* dict_values = dictionary_->data_as(); int values_decoded = 0; int num_appended = 0; @@ -2090,7 +2084,7 @@ class DictByteArrayDecoderImpl : public DictDecoderImpl, RETURN_NOT_OK(builder->Reserve(num_values)); - auto dict_values = reinterpret_cast(dictionary_->data()); + const auto* dict_values = dictionary_->data_as(); int values_decoded = 0; while (values_decoded < num_values) { @@ -2388,7 +2382,7 @@ void DeltaBitPackEncoder::PutSpaced(const T* src, int num_values, if (valid_bits != NULLPTR) { PARQUET_ASSIGN_OR_THROW(auto buffer, ::arrow::AllocateBuffer(num_values * sizeof(T), this->memory_pool())); - T* data = reinterpret_cast(buffer->mutable_data()); + T* data = buffer->template mutable_data_as(); int num_valid_values = ::arrow::util::internal::SpacedCompress( src, num_values, valid_bits, valid_bits_offset, data); Put(data, num_valid_values); @@ -2734,7 +2728,7 @@ void DeltaLengthByteArrayEncoder::PutSpaced(const T* src, int num_values, if (valid_bits != NULLPTR) { PARQUET_ASSIGN_OR_THROW(auto buffer, ::arrow::AllocateBuffer(num_values * sizeof(T), this->memory_pool())); - T* data = reinterpret_cast(buffer->mutable_data()); + T* data = buffer->template mutable_data_as(); int num_valid_values = ::arrow::util::internal::SpacedCompress( src, num_values, valid_bits, valid_bits_offset, data); Put(data, num_valid_values); @@ -2789,8 +2783,7 @@ class DeltaLengthByteArrayDecoder : public DecoderImpl, } int32_t data_size = 0; - const int32_t* length_ptr = - reinterpret_cast(buffered_length_->data()) + length_idx_; + const int32_t* length_ptr = buffered_length_->data_as() + length_idx_; int bytes_offset = len_ - decoder_->bytes_left(); for (int i = 0; i < max_values; ++i) { int32_t len = length_ptr[i]; @@ -2844,8 +2837,8 @@ class DeltaLengthByteArrayDecoder : public DecoderImpl, // call len_decoder_.Decode to decode all the lengths. // all the lengths are buffered in buffered_length_. - int ret = len_decoder_.Decode( - reinterpret_cast(buffered_length_->mutable_data()), num_length); + int ret = + len_decoder_.Decode(buffered_length_->mutable_data_as(), num_length); DCHECK_EQ(ret, num_length); length_idx_ = 0; num_valid_values_ = num_length; @@ -2938,7 +2931,7 @@ class RleBooleanEncoder final : public EncoderImpl, virtual public BooleanEncode if (valid_bits != NULLPTR) { PARQUET_ASSIGN_OR_THROW(auto buffer, ::arrow::AllocateBuffer(num_values * sizeof(T), this->memory_pool())); - T* data = reinterpret_cast(buffer->mutable_data()); + T* data = buffer->mutable_data_as(); int num_valid_values = ::arrow::util::internal::SpacedCompress( src, num_values, valid_bits, valid_bits_offset, data); Put(data, num_valid_values); @@ -3136,7 +3129,7 @@ class DeltaByteArrayEncoder : public EncoderImpl, virtual public TypedEncoder
Resize(num_values * sizeof(T), false)); } - T* data = reinterpret_cast(buffer_->mutable_data()); + T* data = buffer_->mutable_data_as(); int num_valid_values = ::arrow::util::internal::SpacedCompress( src, num_values, valid_bits, valid_bits_offset, data); Put(data, num_valid_values); @@ -3338,7 +3331,7 @@ class DeltaByteArrayDecoderImpl : public DecoderImpl, virtual public TypedDecode // all the prefix lengths are buffered in buffered_prefix_length_. PARQUET_THROW_NOT_OK(buffered_prefix_length_->Resize(num_prefix * sizeof(int32_t))); int ret = prefix_len_decoder_.Decode( - reinterpret_cast(buffered_prefix_length_->mutable_data()), num_prefix); + buffered_prefix_length_->mutable_data_as(), num_prefix); DCHECK_EQ(ret, num_prefix); prefix_len_offset_ = 0; num_valid_values_ = num_prefix; @@ -3425,8 +3418,7 @@ class DeltaByteArrayDecoderImpl : public DecoderImpl, virtual public TypedDecode int64_t data_size = 0; const int32_t* prefix_len_ptr = - reinterpret_cast(buffered_prefix_length_->data()) + - prefix_len_offset_; + buffered_prefix_length_->data_as() + prefix_len_offset_; for (int i = 0; i < max_values; ++i) { if (prefix_len_ptr[i] == 0) { // We don't need to copy the suffix if the prefix length is 0. @@ -3578,7 +3570,7 @@ class ByteStreamSplitDecoder : public DecoderImpl, virtual public TypedDecodersize() < size) { PARQUET_ASSIGN_OR_THROW(decode_buffer_, ::arrow::AllocateBuffer(size)); } - return reinterpret_cast(decode_buffer_->mutable_data()); + return decode_buffer_->mutable_data_as(); } private: