From 58d37edcc3bfca8309d882b152f966e04b1f6679 Mon Sep 17 00:00:00 2001 From: Luc Rancourt Date: Tue, 7 Feb 2023 23:40:53 +0100 Subject: [PATCH] RLE and dictionary filter only enabled for UTF8 since format version 17. This fixes an issue in the filter pipeline where we should only skip offsets unfiltering for RLE/dictionary filters for UTF8 strings starting at version 17. --- TYPE: IMPROVEMENT DESC: RLE and dictionary filter only enabled for UTF8 since format version 17. --- tiledb/sm/filter/filter_pipeline.cc | 8 ++++---- tiledb/sm/query/readers/reader_base.cc | 3 ++- 2 files changed, 6 insertions(+), 5 deletions(-) diff --git a/tiledb/sm/filter/filter_pipeline.cc b/tiledb/sm/filter/filter_pipeline.cc index c937c65630b..92d4d4842ba 100644 --- a/tiledb/sm/filter/filter_pipeline.cc +++ b/tiledb/sm/filter/filter_pipeline.cc @@ -713,13 +713,13 @@ Status FilterPipeline::append_encryption_filter( bool FilterPipeline::skip_offsets_filtering( const Datatype type, const uint32_t version) const { - if (version >= 12 && - (type == Datatype::STRING_ASCII || type == Datatype::STRING_UTF8) && + if (((version >= 12 && type == Datatype::STRING_ASCII) || + (version >= 17 && type == Datatype::STRING_UTF8)) && has_filter(FilterType::FILTER_RLE)) { return true; } else if ( - version >= 13 && - (type == Datatype::STRING_ASCII || type == Datatype::STRING_UTF8) && + ((version >= 13 && type == Datatype::STRING_ASCII) || + (version >= 17 && type == Datatype::STRING_UTF8)) && has_filter(FilterType::FILTER_DICTIONARY)) { return true; } diff --git a/tiledb/sm/query/readers/reader_base.cc b/tiledb/sm/query/readers/reader_base.cc index 49e8686774d..341260adb4f 100644 --- a/tiledb/sm/query/readers/reader_base.cc +++ b/tiledb/sm/query/readers/reader_base.cc @@ -1409,7 +1409,8 @@ Status ReaderBase::unfilter_tile_nullable( // Reverse the tile filters. // If offsets don't need to be unfiltered separately, it means they // will be created on the fly from filtered var-length data - if (filters.skip_offsets_filtering(tile->type(), array_schema_.version())) { + if (filters.skip_offsets_filtering( + tile_var->type(), array_schema_.version())) { RETURN_NOT_OK(filters.run_reverse( stats_, tile_var,