From dd9682747e50dd471133dd595a822b57b20f76f9 Mon Sep 17 00:00:00 2001 From: zzzxl1993 <474696115@qq.com> Date: Mon, 8 Jul 2024 11:27:53 +0800 Subject: [PATCH] [opt](inverted index) Optimization of the initialization process in topn --- .../olap/rowset/segment_v2/segment_iterator.cpp | 15 +++++++++------ be/src/olap/rowset/segment_v2/segment_iterator.h | 3 ++- 2 files changed, 11 insertions(+), 7 deletions(-) diff --git a/be/src/olap/rowset/segment_v2/segment_iterator.cpp b/be/src/olap/rowset/segment_v2/segment_iterator.cpp index 82a3c8ba509b2d..de12ebb96c1b5b 100644 --- a/be/src/olap/rowset/segment_v2/segment_iterator.cpp +++ b/be/src/olap/rowset/segment_v2/segment_iterator.cpp @@ -1942,7 +1942,8 @@ Status SegmentIterator::_read_columns(const std::vector& column_ids, } Status SegmentIterator::_init_current_block( - vectorized::Block* block, std::vector& current_columns) { + vectorized::Block* block, std::vector& current_columns, + uint32_t nrows_read_limit) { block->clear_column_data(_schema->num_column_ids()); for (size_t i = 0; i < _schema->num_column_ids(); i++) { @@ -1962,7 +1963,7 @@ Status SegmentIterator::_init_current_block( column_desc->path() == nullptr ? "" : column_desc->path()->get_path()); // TODO reuse current_columns[cid] = file_column_type->create_column(); - current_columns[cid]->reserve(_opts.block_row_max); + current_columns[cid]->reserve(nrows_read_limit); } else { // the column in block must clear() here to insert new data if (_is_pred_column[cid] || @@ -1981,7 +1982,7 @@ Status SegmentIterator::_init_current_block( } else if (column_desc->type() == FieldType::OLAP_FIELD_TYPE_DATETIME) { current_columns[cid]->set_datetime_type(); } - current_columns[cid]->reserve(_opts.block_row_max); + current_columns[cid]->reserve(nrows_read_limit); } } } @@ -2387,14 +2388,16 @@ Status SegmentIterator::_next_batch_internal(vectorized::Block* block) { } } } - RETURN_IF_ERROR(_init_current_block(block, _current_return_columns)); - _converted_column_ids.assign(_schema->columns().size(), 0); - _current_batch_rows_read = 0; uint32_t nrows_read_limit = _opts.block_row_max; if (_can_opt_topn_reads()) { nrows_read_limit = std::min(static_cast(_opts.topn_limit), nrows_read_limit); } + + RETURN_IF_ERROR(_init_current_block(block, _current_return_columns, nrows_read_limit)); + _converted_column_ids.assign(_schema->columns().size(), 0); + + _current_batch_rows_read = 0; RETURN_IF_ERROR(_read_columns_by_index( nrows_read_limit, _current_batch_rows_read, _lazy_materialization_read || _opts.record_rowids || _is_need_expr_eval)); diff --git a/be/src/olap/rowset/segment_v2/segment_iterator.h b/be/src/olap/rowset/segment_v2/segment_iterator.h index 5b89d1932fa262..d93f341b678644 100644 --- a/be/src/olap/rowset/segment_v2/segment_iterator.h +++ b/be/src/olap/rowset/segment_v2/segment_iterator.h @@ -222,7 +222,8 @@ class SegmentIterator : public RowwiseIterator { bool set_block_rowid); void _replace_version_col(size_t num_rows); Status _init_current_block(vectorized::Block* block, - std::vector& non_pred_vector); + std::vector& non_pred_vector, + uint32_t nrows_read_limit); uint16_t _evaluate_vectorization_predicate(uint16_t* sel_rowid_idx, uint16_t selected_size); uint16_t _evaluate_short_circuit_predicate(uint16_t* sel_rowid_idx, uint16_t selected_size); void _output_non_pred_columns(vectorized::Block* block);