diff --git a/cpp/src/parquet/bloom_filter_reader.cc b/cpp/src/parquet/bloom_filter_reader.cc index 49c229029356b..3518d2ba1eb76 100644 --- a/cpp/src/parquet/bloom_filter_reader.cc +++ b/cpp/src/parquet/bloom_filter_reader.cc @@ -64,10 +64,14 @@ std::unique_ptr RowGroupBloomFilterReaderImpl::GetColumnBloomFilter throw ParquetException("file size less or equal than bloom offset"); } std::optional bloom_filter_length = col_chunk->bloom_filter_length(); - if (bloom_filter_length.has_value() && - *bloom_filter_length + *bloom_filter_offset > file_size) { - throw ParquetException( - "bloom filter length + bloom filter offset greater than file size"); + if (bloom_filter_length.has_value()) { + if (*bloom_filter_length < 0) { + throw ParquetException("bloom_filter_length less than 0"); + } + if (*bloom_filter_length + *bloom_filter_offset > file_size) { + throw ParquetException( + "bloom filter length + bloom filter offset greater than file size"); + } } auto stream = ::arrow::io::RandomAccessFile::GetStream( input_, *bloom_filter_offset, file_size - *bloom_filter_offset); diff --git a/cpp/src/parquet/bloom_filter_test.cc b/cpp/src/parquet/bloom_filter_test.cc index ca5e06d72c1f7..ff83b97302274 100644 --- a/cpp/src/parquet/bloom_filter_test.cc +++ b/cpp/src/parquet/bloom_filter_test.cc @@ -107,24 +107,26 @@ TEST(BasicTest, TestBloomFilter) { // Deserialize Bloom filter from memory ASSERT_OK_AND_ASSIGN(auto buffer, sink->Finish()); - ::arrow::io::BufferReader source(buffer); ReaderProperties reader_properties; - BlockSplitBloomFilter de_bloom = - BlockSplitBloomFilter::Deserialize(reader_properties, &source, std::nullopt); - - // Lookup previously inserted values - for (const auto v : kIntInserts) { - EXPECT_TRUE(de_bloom.FindHash(de_bloom.Hash(v))); - } - for (const auto v : kFloatInserts) { - EXPECT_TRUE(de_bloom.FindHash(de_bloom.Hash(v))); + for (std::optional bloom_filter_length : + std::vector>{std::nullopt, buffer->size()}) { + ::arrow::io::BufferReader source(buffer); + BlockSplitBloomFilter de_bloom = BlockSplitBloomFilter::Deserialize( + reader_properties, &source, bloom_filter_length); + // Lookup previously inserted values + for (const auto v : kIntInserts) { + EXPECT_TRUE(de_bloom.FindHash(de_bloom.Hash(v))); + } + for (const auto v : kFloatInserts) { + EXPECT_TRUE(de_bloom.FindHash(de_bloom.Hash(v))); + } + false_positives = 0; + for (const auto v : kNegativeIntLookups) { + false_positives += de_bloom.FindHash(de_bloom.Hash(v)); + } + EXPECT_LE(false_positives, 2); } - false_positives = 0; - for (const auto v : kNegativeIntLookups) { - false_positives += de_bloom.FindHash(de_bloom.Hash(v)); - } - EXPECT_LE(false_positives, 2); } }