Skip to content

Commit

Permalink
Context-free "filtered" keyword semantics (#1513)
Browse files Browse the repository at this point in the history
Currently, both the Tile::filtered_ and the query context (e.g. in the read or
write path) are required to determine if the Tile::buffer_ contains the on-disk
format. For example: when 'filtered_' is true in the write path, 'buffer_'
contains the on-disk format. When 'filtered_' is true in the read path,
'buffer_' does not contain the on-disk format.

This patch changes the semantics of the "filtered" to mean "filtered, on-disk
formatted" and "unfiltered" to mean "reverse-filtered, in-memory formatted".

This may be a useful cleanup, but the motiviation for this patch is to prepare
for the vectorized tile buffer patch:

'void* Tile::buffer_' will be removed in favor of 'ChunkedBuffer*
Tile::chunked_buffer_' and 'void* Tile::filtered_buffer_'. The 'chunked_buffer_'
contains the "reverse-filtered, in-memory formatted" data. The 'filtered_buffer_'
contains the "filtered, on-disk formatted" data.

With the new semantics of the "filtered" keyword, 'bool Tile::filtered_' becomes
unneccessary because "bool Tile::filtered()" can be implemented by determing
which of the two buffers ('chunked_buffer_' or 'filtered_buffer_') are non-empty.

In other words: the state of Tile will become more complex to support two
separate buffers. This patch is a pre-requisite to reducing some of that state
by allowing us to eliminate "bool Tile::filtered_".
  • Loading branch information
joe maley authored Feb 20, 2020
1 parent 6d6dce5 commit 8f09548
Show file tree
Hide file tree
Showing 9 changed files with 89 additions and 60 deletions.
8 changes: 7 additions & 1 deletion test/src/unit-Tile.cc
Original file line number Diff line number Diff line change
Expand Up @@ -45,7 +45,13 @@ TEST_CASE("Tile: Test basic read", "[Tile][basic_read]") {
const Datatype data_type = Datatype::UINT32;
const uint64_t cell_size = 0;
const unsigned int dim_num = 0;
CHECK(tile.init(format_version, data_type, cell_size, dim_num).ok());
CHECK(tile.init(
format_version,
data_type,
cell_size,
dim_num,
true /* filtered */)
.ok());

// Create a buffer to write to the test Tile.
const uint32_t buffer_len = 128;
Expand Down
2 changes: 1 addition & 1 deletion tiledb/sm/misc/stats.cc
Original file line number Diff line number Diff line change
Expand Up @@ -94,7 +94,7 @@ void Statistics::dump_read_summary(FILE* out) const {
out,
" Read compression ratio",
"bytes",
counter_reader_num_bytes_after_filtering +
counter_reader_num_bytes_after_unfiltering +
counter_tileio_read_num_resulting_bytes,
counter_reader_num_tile_bytes_read + counter_tileio_read_num_bytes_read);
}
Expand Down
12 changes: 6 additions & 6 deletions tiledb/sm/misc/stats_counters.h
Original file line number Diff line number Diff line change
Expand Up @@ -68,13 +68,13 @@ STATS_DEFINE_FUNC_STAT(reader_copy_var_cells)
STATS_DEFINE_FUNC_STAT(reader_dedup_coords)
STATS_DEFINE_FUNC_STAT(reader_dense_read)
STATS_DEFINE_FUNC_STAT(reader_fill_coords)
STATS_DEFINE_FUNC_STAT(reader_filter_tiles)
STATS_DEFINE_FUNC_STAT(reader_init_tile_fragment_dense_cell_range_iters)
STATS_DEFINE_FUNC_STAT(reader_next_subarray_partition)
STATS_DEFINE_FUNC_STAT(reader_read)
STATS_DEFINE_FUNC_STAT(reader_read_all_tiles)
STATS_DEFINE_FUNC_STAT(reader_sort_coords)
STATS_DEFINE_FUNC_STAT(reader_sparse_read)
STATS_DEFINE_FUNC_STAT(reader_unfilter_tiles)
// Writer
STATS_DEFINE_FUNC_STAT(writer_check_coord_dups)
STATS_DEFINE_FUNC_STAT(writer_check_coord_dups_global)
Expand Down Expand Up @@ -195,13 +195,13 @@ STATS_INIT_FUNC_STAT(reader_copy_var_cells)
STATS_INIT_FUNC_STAT(reader_dedup_coords)
STATS_INIT_FUNC_STAT(reader_dense_read)
STATS_INIT_FUNC_STAT(reader_fill_coords)
STATS_INIT_FUNC_STAT(reader_filter_tiles)
STATS_INIT_FUNC_STAT(reader_init_tile_fragment_dense_cell_range_iters)
STATS_INIT_FUNC_STAT(reader_next_subarray_partition)
STATS_INIT_FUNC_STAT(reader_read)
STATS_INIT_FUNC_STAT(reader_read_all_tiles)
STATS_INIT_FUNC_STAT(reader_sort_coords)
STATS_INIT_FUNC_STAT(reader_sparse_read)
STATS_INIT_FUNC_STAT(reader_unfilter_tiles)
// Writer
STATS_INIT_FUNC_STAT(writer_check_coord_dups)
STATS_INIT_FUNC_STAT(writer_check_coord_dups_global)
Expand Down Expand Up @@ -322,13 +322,13 @@ STATS_REPORT_FUNC_STAT(reader_copy_var_cells)
STATS_REPORT_FUNC_STAT(reader_dedup_coords)
STATS_REPORT_FUNC_STAT(reader_dense_read)
STATS_REPORT_FUNC_STAT(reader_fill_coords)
STATS_REPORT_FUNC_STAT(reader_filter_tiles)
STATS_REPORT_FUNC_STAT(reader_init_tile_fragment_dense_cell_range_iters)
STATS_REPORT_FUNC_STAT(reader_next_subarray_partition)
STATS_REPORT_FUNC_STAT(reader_read)
STATS_REPORT_FUNC_STAT(reader_read_all_tiles)
STATS_REPORT_FUNC_STAT(reader_sort_coords)
STATS_REPORT_FUNC_STAT(reader_sparse_read)
STATS_REPORT_FUNC_STAT(reader_unfilter_tiles)
// Writer
STATS_REPORT_FUNC_STAT(writer_check_coord_dups)
STATS_REPORT_FUNC_STAT(writer_check_coord_dups_global)
Expand Down Expand Up @@ -427,7 +427,7 @@ STATS_DEFINE_COUNTER_STAT(fragment_metadata_cache_read_misses)
// Reader
STATS_DEFINE_COUNTER_STAT(reader_attr_tile_cache_hits)
STATS_DEFINE_COUNTER_STAT(reader_num_attr_tiles_touched)
STATS_DEFINE_COUNTER_STAT(reader_num_bytes_after_filtering)
STATS_DEFINE_COUNTER_STAT(reader_num_bytes_after_unfiltering)
STATS_DEFINE_COUNTER_STAT(reader_num_fixed_cell_bytes_copied)
STATS_DEFINE_COUNTER_STAT(reader_num_fixed_cell_bytes_read)
STATS_DEFINE_COUNTER_STAT(reader_num_tile_bytes_read)
Expand Down Expand Up @@ -477,7 +477,7 @@ STATS_INIT_COUNTER_STAT(fragment_metadata_cache_read_misses)
// Reader
STATS_INIT_COUNTER_STAT(reader_attr_tile_cache_hits)
STATS_INIT_COUNTER_STAT(reader_num_attr_tiles_touched)
STATS_INIT_COUNTER_STAT(reader_num_bytes_after_filtering)
STATS_INIT_COUNTER_STAT(reader_num_bytes_after_unfiltering)
STATS_INIT_COUNTER_STAT(reader_num_fixed_cell_bytes_copied)
STATS_INIT_COUNTER_STAT(reader_num_fixed_cell_bytes_read)
STATS_INIT_COUNTER_STAT(reader_num_tile_bytes_read)
Expand Down Expand Up @@ -527,7 +527,7 @@ STATS_REPORT_COUNTER_STAT(fragment_metadata_cache_read_misses)
// Reader
STATS_REPORT_COUNTER_STAT(reader_attr_tile_cache_hits)
STATS_REPORT_COUNTER_STAT(reader_num_attr_tiles_touched)
STATS_REPORT_COUNTER_STAT(reader_num_bytes_after_filtering)
STATS_REPORT_COUNTER_STAT(reader_num_bytes_after_unfiltering)
STATS_REPORT_COUNTER_STAT(reader_num_fixed_cell_bytes_copied)
STATS_REPORT_COUNTER_STAT(reader_num_fixed_cell_bytes_read)
STATS_REPORT_COUNTER_STAT(reader_num_tile_bytes_read)
Expand Down
66 changes: 37 additions & 29 deletions tiledb/sm/query/reader.cc
Original file line number Diff line number Diff line change
Expand Up @@ -1292,23 +1292,23 @@ Status Reader::compute_result_coords(
return Status::Ok();

// Create temporary vector with pointers to result tiles, so that
// `read_tiles`, `filter_tiles` below can work without changes
// `read_tiles`, `unfilter_tiles` below can work without changes
std::vector<ResultTile*> tmp_result_tiles;
for (auto& result_tile : *result_tiles)
tmp_result_tiles.push_back(&result_tile);

// Read and filter coordinate tiles
// Read and unfilter coordinate tiles
// NOTE: these will ignore tiles of fragments with format version >=5
RETURN_CANCEL_OR_ERROR(read_tiles(constants::coords, tmp_result_tiles));
RETURN_CANCEL_OR_ERROR(filter_tiles(constants::coords, tmp_result_tiles));
RETURN_CANCEL_OR_ERROR(unfilter_tiles(constants::coords, tmp_result_tiles));

// Read and filter coordinate tiles
// Read and unfilter coordinate tiles
// NOTE: these will ignore tiles of fragments with format version <5
auto dim_num = array_schema_->dim_num();
for (unsigned d = 0; d < dim_num; ++d) {
const auto& dim_name = array_schema_->dimension(d)->name();
RETURN_CANCEL_OR_ERROR(read_tiles(dim_name, tmp_result_tiles));
RETURN_CANCEL_OR_ERROR(filter_tiles(dim_name, tmp_result_tiles));
RETURN_CANCEL_OR_ERROR(unfilter_tiles(dim_name, tmp_result_tiles));
}

// Compute the read coordinates for all fragments for each subarray range
Expand Down Expand Up @@ -1394,7 +1394,7 @@ Status Reader::dense_read() {
continue;

RETURN_CANCEL_OR_ERROR(read_tiles(attr, result_tiles));
RETURN_CANCEL_OR_ERROR(filter_tiles(attr, result_tiles));
RETURN_CANCEL_OR_ERROR(unfilter_tiles(attr, result_tiles));
RETURN_CANCEL_OR_ERROR(copy_cells(attr, stride, result_cell_slabs));
clear_tiles(attr, result_tiles);
}
Expand Down Expand Up @@ -1537,10 +1537,10 @@ void Reader::fill_dense_coords_col_slab(
}
}

Status Reader::filter_tiles(
Status Reader::unfilter_tiles(
const std::string& name,
const std::vector<ResultTile*>& result_tiles) const {
STATS_FUNC_IN(reader_filter_tiles);
STATS_FUNC_IN(reader_unfilter_tiles);

auto var_size = array_schema_->var_size(name);
auto num_tiles = static_cast<uint64_t>(result_tiles.size());
Expand Down Expand Up @@ -1573,21 +1573,21 @@ Status Reader::filter_tiles(
auto& t = tile_pair->first;
auto& t_var = tile_pair->second;

if (!t.filtered()) {
if (t.filtered()) {
// Decompress, etc.
RETURN_NOT_OK(filter_tile(name, &t, var_size));
RETURN_NOT_OK(unfilter_tile(name, &t, var_size));
RETURN_NOT_OK(storage_manager_->write_to_cache(
tile_attr_uri, tile_attr_offset, t.buffer()));
}

if (var_size && !t_var.filtered()) {
if (var_size && t_var.filtered()) {
auto tile_attr_var_uri = fragment->var_uri(name);
uint64_t tile_attr_var_offset;
RETURN_NOT_OK(fragment->file_var_offset(
*encryption_key, name, tile_idx, &tile_attr_var_offset));

// Decompress, etc.
RETURN_NOT_OK(filter_tile(name, &t_var, false));
RETURN_NOT_OK(unfilter_tile(name, &t_var, false));
RETURN_NOT_OK(storage_manager_->write_to_cache(
tile_attr_var_uri, tile_attr_var_offset, t_var.buffer()));
}
Expand All @@ -1601,28 +1601,25 @@ Status Reader::filter_tiles(

return Status::Ok();

STATS_FUNC_OUT(reader_filter_tiles);
STATS_FUNC_OUT(reader_unfilter_tiles);
}

Status Reader::filter_tile(
Status Reader::unfilter_tile(
const std::string& name, Tile* tile, bool offsets) const {
uint64_t orig_size = tile->buffer()->size();

// Get a copy of the appropriate filter pipeline.
// Get a copy of the appropriate unfilter pipeline.
FilterPipeline filters =
(offsets ? *array_schema_->cell_var_offsets_filters() :
*array_schema_->filters(name));

// Append an encryption filter when necessary.
// Append an encryption unfilter when necessary.
RETURN_NOT_OK(FilterPipeline::append_encryption_filter(
&filters, array_->get_encryption_key()));

RETURN_NOT_OK(filters.run_reverse(tile));

tile->set_filtered(true);
tile->set_pre_filtered_size(orig_size);
tile->set_filtered(false);

STATS_COUNTER_ADD(reader_num_bytes_after_filtering, tile->size());
STATS_COUNTER_ADD(reader_num_bytes_after_unfiltering, tile->size());

return Status::Ok();
}
Expand Down Expand Up @@ -1704,8 +1701,13 @@ Status Reader::init_tile(
auto tile_size = cell_num_per_tile * cell_size;

// Initialize
RETURN_NOT_OK(
tile->init(format_version, type, tile_size, cell_size, dim_num));
RETURN_NOT_OK(tile->init(
format_version,
type,
tile_size,
cell_size,
dim_num,
true /* filtered */));

return Status::Ok();
}
Expand All @@ -1729,9 +1731,15 @@ Status Reader::init_tile(
constants::cell_var_offset_type,
tile_size,
constants::cell_var_offset_size,
0));
RETURN_NOT_OK(
tile_var->init(format_version, type, tile_size, datatype_size(type), 0));
0,
true /* filtered */));
RETURN_NOT_OK(tile_var->init(
format_version,
type,
tile_size,
datatype_size(type),
0,
true /* filtered */));
return Status::Ok();
}

Expand Down Expand Up @@ -1818,7 +1826,7 @@ Status Reader::read_tiles(
RETURN_NOT_OK(storage_manager_->read_from_cache(
tile_attr_uri, tile_attr_offset, t.buffer(), tile_size, &cache_hit));
if (cache_hit) {
t.set_filtered(true);
t.set_filtered(false);
STATS_COUNTER_ADD(reader_attr_tile_cache_hits, 1);
} else {
// Add the region of the fragment to be read.
Expand Down Expand Up @@ -1851,7 +1859,7 @@ Status Reader::read_tiles(
&cache_hit));

if (cache_hit) {
t_var.set_filtered(true);
t_var.set_filtered(false);
STATS_COUNTER_ADD(reader_attr_tile_cache_hits, 1);
} else {
// Add the region of the fragment to be read.
Expand Down Expand Up @@ -1960,7 +1968,7 @@ Status Reader::sparse_read() {
continue;

RETURN_CANCEL_OR_ERROR(read_tiles(attr, result_tiles));
RETURN_CANCEL_OR_ERROR(filter_tiles(attr, result_tiles));
RETURN_CANCEL_OR_ERROR(unfilter_tiles(attr, result_tiles));
RETURN_CANCEL_OR_ERROR(copy_cells(attr, stride, result_cell_slabs));
clear_tiles(attr, result_tiles);
}
Expand Down
12 changes: 6 additions & 6 deletions tiledb/sm/query/reader.h
Original file line number Diff line number Diff line change
Expand Up @@ -858,11 +858,11 @@ class Reader {
* Filters the tiles on a particular attribute/dimension from all input
* fragments based on the tile info in `result_tiles`.
*
* @param name Attribute/dimension whose tiles will be filtered
* @param result_tiles Vector containing the tiles to be filtered
* @param name Attribute/dimension whose tiles will be unfiltered
* @param result_tiles Vector containing the tiles to be unfiltered
* @return Status
*/
Status filter_tiles(
Status unfilter_tiles(
const std::string& name,
const std::vector<ResultTile*>& result_tiles) const;

Expand All @@ -872,12 +872,12 @@ class Reader {
* pipeline.
*
* @param name The attribute/dimension the tile belong to.
* @param tile The tile to be filtered.
* @param offsets True if the tile to be filtered contains offsets for a
* @param tile The tile to be unfiltered.
* @param offsets True if the tile to be unfiltered contains offsets for a
* var-sized attribute/dimension.
* @return Status
*/
Status filter_tile(const std::string& name, Tile* tile, bool offsets) const;
Status unfilter_tile(const std::string& name, Tile* tile, bool offsets) const;

/**
* Gets all the result coordinates of the input tile into `result_coords`.
Expand Down
19 changes: 15 additions & 4 deletions tiledb/sm/query/writer.cc
Original file line number Diff line number Diff line change
Expand Up @@ -1382,8 +1382,13 @@ Status Writer::init_tile(const std::string& name, Tile* tile) const {
auto tile_size = cell_num_per_tile * cell_size;

// Initialize
RETURN_NOT_OK(
tile->init(constants::format_version, type, tile_size, cell_size, 0));
RETURN_NOT_OK(tile->init(
constants::format_version,
type,
tile_size,
cell_size,
0,
false /* filtered */));

return Status::Ok();
}
Expand All @@ -1403,9 +1408,15 @@ Status Writer::init_tile(
constants::cell_var_offset_type,
tile_size,
constants::cell_var_offset_size,
0));
0,
false /* filtered */));
RETURN_NOT_OK(tile_var->init(
constants::format_version, type, tile_size, datatype_size(type), 0));
constants::format_version,
type,
tile_size,
datatype_size(type),
0,
false /* filtered */));
return Status::Ok();
}

Expand Down
8 changes: 6 additions & 2 deletions tiledb/sm/tile/tile.cc
Original file line number Diff line number Diff line change
Expand Up @@ -132,11 +132,13 @@ Status Tile::init(
uint32_t format_version,
Datatype type,
uint64_t cell_size,
unsigned int dim_num) {
unsigned int dim_num,
bool filtered) {
cell_size_ = cell_size;
dim_num_ = dim_num;
type_ = type;
format_version_ = format_version;
filtered_ = filtered;

buffer_ = new Buffer();
if (buffer_ == nullptr)
Expand All @@ -151,11 +153,13 @@ Status Tile::init(
Datatype type,
uint64_t tile_size,
uint64_t cell_size,
unsigned int dim_num) {
unsigned int dim_num,
bool filtered) {
cell_size_ = cell_size;
dim_num_ = dim_num;
type_ = type;
format_version_ = format_version;
filtered_ = filtered;

buffer_ = new Buffer();
if (buffer_ == nullptr)
Expand Down
Loading

0 comments on commit 8f09548

Please sign in to comment.