Skip to content

Commit

Permalink
add deserialize test
Browse files Browse the repository at this point in the history
  • Loading branch information
mapleFU committed Nov 24, 2023
1 parent f7535cc commit e5875e9
Show file tree
Hide file tree
Showing 2 changed files with 25 additions and 19 deletions.
12 changes: 8 additions & 4 deletions cpp/src/parquet/bloom_filter_reader.cc
Original file line number Diff line number Diff line change
Expand Up @@ -64,10 +64,14 @@ std::unique_ptr<BloomFilter> RowGroupBloomFilterReaderImpl::GetColumnBloomFilter
throw ParquetException("file size less or equal than bloom offset");
}
std::optional<int64_t> bloom_filter_length = col_chunk->bloom_filter_length();
if (bloom_filter_length.has_value() &&
*bloom_filter_length + *bloom_filter_offset > file_size) {
throw ParquetException(
"bloom filter length + bloom filter offset greater than file size");
if (bloom_filter_length.has_value()) {
if (*bloom_filter_length < 0) {
throw ParquetException("bloom_filter_length less than 0");
}
if (*bloom_filter_length + *bloom_filter_offset > file_size) {
throw ParquetException(
"bloom filter length + bloom filter offset greater than file size");
}
}
auto stream = ::arrow::io::RandomAccessFile::GetStream(
input_, *bloom_filter_offset, file_size - *bloom_filter_offset);
Expand Down
32 changes: 17 additions & 15 deletions cpp/src/parquet/bloom_filter_test.cc
Original file line number Diff line number Diff line change
Expand Up @@ -107,24 +107,26 @@ TEST(BasicTest, TestBloomFilter) {

// Deserialize Bloom filter from memory
ASSERT_OK_AND_ASSIGN(auto buffer, sink->Finish());
::arrow::io::BufferReader source(buffer);

ReaderProperties reader_properties;
BlockSplitBloomFilter de_bloom =
BlockSplitBloomFilter::Deserialize(reader_properties, &source, std::nullopt);

// Lookup previously inserted values
for (const auto v : kIntInserts) {
EXPECT_TRUE(de_bloom.FindHash(de_bloom.Hash(v)));
}
for (const auto v : kFloatInserts) {
EXPECT_TRUE(de_bloom.FindHash(de_bloom.Hash(v)));
for (std::optional<int64_t> bloom_filter_length :
std::vector<std::optional<int64_t>>{std::nullopt, buffer->size()}) {
::arrow::io::BufferReader source(buffer);
BlockSplitBloomFilter de_bloom = BlockSplitBloomFilter::Deserialize(
reader_properties, &source, bloom_filter_length);
// Lookup previously inserted values
for (const auto v : kIntInserts) {
EXPECT_TRUE(de_bloom.FindHash(de_bloom.Hash(v)));
}
for (const auto v : kFloatInserts) {
EXPECT_TRUE(de_bloom.FindHash(de_bloom.Hash(v)));
}
false_positives = 0;
for (const auto v : kNegativeIntLookups) {
false_positives += de_bloom.FindHash(de_bloom.Hash(v));
}
EXPECT_LE(false_positives, 2);
}
false_positives = 0;
for (const auto v : kNegativeIntLookups) {
false_positives += de_bloom.FindHash(de_bloom.Hash(v));
}
EXPECT_LE(false_positives, 2);
}
}

Expand Down

0 comments on commit e5875e9

Please sign in to comment.