From 824f808962670baec727792576d3fb73b11257ea Mon Sep 17 00:00:00 2001 From: mwish Date: Tue, 26 Sep 2023 22:42:15 +0800 Subject: [PATCH] Basic impl --- cpp/src/parquet/encoding.cc | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/cpp/src/parquet/encoding.cc b/cpp/src/parquet/encoding.cc index 0564ea2b93f3f..044f5b6a96b5f 100644 --- a/cpp/src/parquet/encoding.cc +++ b/cpp/src/parquet/encoding.cc @@ -3369,6 +3369,9 @@ class DeltaByteArrayDecoderImpl : public DecoderImpl, virtual public TypedDecode if (ARROW_PREDICT_FALSE(prefix_len_ptr[i] < 0)) { throw ParquetException("negative prefix length in DELTA_BYTE_ARRAY"); } + if (buffer[i].len == 0 || prefix_len_ptr[i] == 0) { + continue; + } if (ARROW_PREDICT_FALSE(AddWithOverflow(data_size, prefix_len_ptr[i], &data_size) || AddWithOverflow(data_size, buffer[i].len, &data_size))) { throw ParquetException("excess expansion in DELTA_BYTE_ARRAY"); @@ -3382,6 +3385,14 @@ class DeltaByteArrayDecoderImpl : public DecoderImpl, virtual public TypedDecode if (ARROW_PREDICT_FALSE(static_cast(prefix_len_ptr[i]) > prefix.length())) { throw ParquetException("prefix length too large in DELTA_BYTE_ARRAY"); } + if (prefix_len_ptr[i] == 0) { + prefix = std::string_view{buffer[i]}; + continue; + } + if (buffer[i].len == 0) { + buffer[i] = prefix; + continue; + } memcpy(data_ptr, prefix.data(), prefix_len_ptr[i]); // buffer[i] currently points to the string suffix memcpy(data_ptr + prefix_len_ptr[i], buffer[i].ptr, buffer[i].len);