From 5801d8d99e7fba5218147f6ac4d9d670f34e8ac6 Mon Sep 17 00:00:00 2001 From: Luwei <814383175@qq.com> Date: Wed, 17 Jul 2024 00:05:58 +0800 Subject: [PATCH 1/7] [fix](compaction) fix mismatch between segment key and value column rows during compaction --- .../rowset/vertical_beta_rowset_writer.cpp | 59 ++++++++++--------- 1 file changed, 30 insertions(+), 29 deletions(-) diff --git a/be/src/olap/rowset/vertical_beta_rowset_writer.cpp b/be/src/olap/rowset/vertical_beta_rowset_writer.cpp index 1de7d4f50dce8c..a73ba3a30fb7f6 100644 --- a/be/src/olap/rowset/vertical_beta_rowset_writer.cpp +++ b/be/src/olap/rowset/vertical_beta_rowset_writer.cpp @@ -82,37 +82,38 @@ Status VerticalBetaRowsetWriter::add_columns(const vectorized::Block* block, } RETURN_IF_ERROR(_segment_writers[_cur_writer_idx]->append_block(block, 0, num_rows)); } else { - // value columns - uint32_t num_rows_written = _segment_writers[_cur_writer_idx]->num_rows_written(); - VLOG_NOTICE << "num_rows_written: " << num_rows_written - << ", _cur_writer_idx: " << _cur_writer_idx; - uint32_t num_rows_key_group = _segment_writers[_cur_writer_idx]->row_count(); - // init if it's first value column write in current segment - if (_cur_writer_idx == 0 && num_rows_written == 0) { - VLOG_NOTICE << "init first value column segment writer"; - RETURN_IF_ERROR(_segment_writers[_cur_writer_idx]->init(col_ids, is_key)); - } - // when splitting segment, need to make rows align between key columns and value columns size_t start_offset = 0; size_t limit = num_rows; - if (num_rows_written + num_rows >= num_rows_key_group && - _cur_writer_idx < _segment_writers.size() - 1) { - RETURN_IF_ERROR(_segment_writers[_cur_writer_idx]->append_block( - block, 0, num_rows_key_group - num_rows_written)); - RETURN_IF_ERROR(_flush_columns(_segment_writers[_cur_writer_idx].get())); - start_offset = num_rows_key_group - num_rows_written; - limit = num_rows - start_offset; - ++_cur_writer_idx; - // switch to next writer - RETURN_IF_ERROR(_segment_writers[_cur_writer_idx]->init(col_ids, is_key)); - num_rows_written = 0; - num_rows_key_group = _segment_writers[_cur_writer_idx]->row_count(); - } - if (limit > 0) { - RETURN_IF_ERROR( - _segment_writers[_cur_writer_idx]->append_block(block, start_offset, limit)); - DCHECK(_segment_writers[_cur_writer_idx]->num_rows_written() <= - _segment_writers[_cur_writer_idx]->row_count()); + while (limit > 0) { + // value columns + uint32_t num_rows_written = _segment_writers[_cur_writer_idx]->num_rows_written(); + VLOG_NOTICE << "num_rows_written: " << num_rows_written + << ", _cur_writer_idx: " << _cur_writer_idx; + uint32_t num_rows_key_group = _segment_writers[_cur_writer_idx]->row_count(); + // init if it's first value column write in current segment + if (_cur_writer_idx == 0 && num_rows_written == 0) { + VLOG_NOTICE << "init first value column segment writer"; + RETURN_IF_ERROR(_segment_writers[_cur_writer_idx]->init(col_ids, is_key)); + } + // when splitting segment, need to make rows align between key columns and value columns + if (num_rows_written + limit >= num_rows_key_group && + _cur_writer_idx < _segment_writers.size() - 1) { + RETURN_IF_ERROR(_segment_writers[_cur_writer_idx]->append_block( + block, 0, num_rows_key_group - num_rows_written)); + RETURN_IF_ERROR(_flush_columns(_segment_writers[_cur_writer_idx].get())); + start_offset += (num_rows_key_group - num_rows_written); + limit = num_rows - start_offset; + ++_cur_writer_idx; + // switch to next writer + RETURN_IF_ERROR(_segment_writers[_cur_writer_idx]->init(col_ids, is_key)); + num_rows_written = 0; + num_rows_key_group = _segment_writers[_cur_writer_idx]->row_count(); + } else { + RETURN_IF_ERROR( + _segment_writers[_cur_writer_idx]->append_block(block, start_offset, limit)); + CHECK(_segment_writers[_cur_writer_idx]->num_rows_written() <= + _segment_writers[_cur_writer_idx]->row_count()); + } } } if (is_key) { From c5ec39e536e98368b03459f8b39655c13269748b Mon Sep 17 00:00:00 2001 From: Luwei <814383175@qq.com> Date: Wed, 17 Jul 2024 00:10:28 +0800 Subject: [PATCH 2/7] fix code style --- be/src/olap/rowset/vertical_beta_rowset_writer.cpp | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/be/src/olap/rowset/vertical_beta_rowset_writer.cpp b/be/src/olap/rowset/vertical_beta_rowset_writer.cpp index a73ba3a30fb7f6..f3b6381d29fc17 100644 --- a/be/src/olap/rowset/vertical_beta_rowset_writer.cpp +++ b/be/src/olap/rowset/vertical_beta_rowset_writer.cpp @@ -109,10 +109,10 @@ Status VerticalBetaRowsetWriter::add_columns(const vectorized::Block* block, num_rows_written = 0; num_rows_key_group = _segment_writers[_cur_writer_idx]->row_count(); } else { - RETURN_IF_ERROR( - _segment_writers[_cur_writer_idx]->append_block(block, start_offset, limit)); + RETURN_IF_ERROR(_segment_writers[_cur_writer_idx]->append_block(block, start_offset, + limit)); CHECK(_segment_writers[_cur_writer_idx]->num_rows_written() <= - _segment_writers[_cur_writer_idx]->row_count()); + _segment_writers[_cur_writer_idx]->row_count()); } } } From bf709f55be03f0785e24b380d8a8c16d08e06aec Mon Sep 17 00:00:00 2001 From: Luwei <814383175@qq.com> Date: Wed, 17 Jul 2024 00:51:36 +0800 Subject: [PATCH 3/7] fix --- be/src/olap/rowset/vertical_beta_rowset_writer.cpp | 1 + 1 file changed, 1 insertion(+) diff --git a/be/src/olap/rowset/vertical_beta_rowset_writer.cpp b/be/src/olap/rowset/vertical_beta_rowset_writer.cpp index f3b6381d29fc17..4a4d2bcdc88236 100644 --- a/be/src/olap/rowset/vertical_beta_rowset_writer.cpp +++ b/be/src/olap/rowset/vertical_beta_rowset_writer.cpp @@ -113,6 +113,7 @@ Status VerticalBetaRowsetWriter::add_columns(const vectorized::Block* block, limit)); CHECK(_segment_writers[_cur_writer_idx]->num_rows_written() <= _segment_writers[_cur_writer_idx]->row_count()); + break; } } } From 6a2dac9dea15e46333dbde944236f6835e517bd7 Mon Sep 17 00:00:00 2001 From: Luwei <814383175@qq.com> Date: Fri, 19 Jul 2024 16:56:03 +0800 Subject: [PATCH 4/7] fix --- .../rowset/vertical_beta_rowset_writer.cpp | 36 ++++++++----------- 1 file changed, 14 insertions(+), 22 deletions(-) diff --git a/be/src/olap/rowset/vertical_beta_rowset_writer.cpp b/be/src/olap/rowset/vertical_beta_rowset_writer.cpp index 4a4d2bcdc88236..178eb1fbbfe5c1 100644 --- a/be/src/olap/rowset/vertical_beta_rowset_writer.cpp +++ b/be/src/olap/rowset/vertical_beta_rowset_writer.cpp @@ -82,38 +82,30 @@ Status VerticalBetaRowsetWriter::add_columns(const vectorized::Block* block, } RETURN_IF_ERROR(_segment_writers[_cur_writer_idx]->append_block(block, 0, num_rows)); } else { - size_t start_offset = 0; - size_t limit = num_rows; - while (limit > 0) { + size_t left = num_rows; + while (left > 0) { // value columns uint32_t num_rows_written = _segment_writers[_cur_writer_idx]->num_rows_written(); VLOG_NOTICE << "num_rows_written: " << num_rows_written << ", _cur_writer_idx: " << _cur_writer_idx; uint32_t num_rows_key_group = _segment_writers[_cur_writer_idx]->row_count(); + CHECK(num_rows_written <= num_rows_key_group); // init if it's first value column write in current segment - if (_cur_writer_idx == 0 && num_rows_written == 0) { + if (num_rows_written == 0) { VLOG_NOTICE << "init first value column segment writer"; RETURN_IF_ERROR(_segment_writers[_cur_writer_idx]->init(col_ids, is_key)); } - // when splitting segment, need to make rows align between key columns and value columns - if (num_rows_written + limit >= num_rows_key_group && - _cur_writer_idx < _segment_writers.size() - 1) { - RETURN_IF_ERROR(_segment_writers[_cur_writer_idx]->append_block( - block, 0, num_rows_key_group - num_rows_written)); - RETURN_IF_ERROR(_flush_columns(_segment_writers[_cur_writer_idx].get())); - start_offset += (num_rows_key_group - num_rows_written); - limit = num_rows - start_offset; + + int64_t to_write = num_rows_written + left >= num_rows_key_group + ? num_rows_key_group - num_rows_written + : left; + RETURN_IF_ERROR(_segment_writers[_cur_writer_idx]->append_block(block, num_rows - left, + to_write)); + left -= to_write; + CHECK(left >= 0); + + if (left > 0) { ++_cur_writer_idx; - // switch to next writer - RETURN_IF_ERROR(_segment_writers[_cur_writer_idx]->init(col_ids, is_key)); - num_rows_written = 0; - num_rows_key_group = _segment_writers[_cur_writer_idx]->row_count(); - } else { - RETURN_IF_ERROR(_segment_writers[_cur_writer_idx]->append_block(block, start_offset, - limit)); - CHECK(_segment_writers[_cur_writer_idx]->num_rows_written() <= - _segment_writers[_cur_writer_idx]->row_count()); - break; } } } From b2fbd6818d335f8e5c6c8d0d9e67e7b693e2cdde Mon Sep 17 00:00:00 2001 From: Luwei <814383175@qq.com> Date: Fri, 19 Jul 2024 16:57:06 +0800 Subject: [PATCH 5/7] fix --- be/src/olap/rowset/vertical_beta_rowset_writer.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/be/src/olap/rowset/vertical_beta_rowset_writer.cpp b/be/src/olap/rowset/vertical_beta_rowset_writer.cpp index 178eb1fbbfe5c1..cd925dd44b8ee5 100644 --- a/be/src/olap/rowset/vertical_beta_rowset_writer.cpp +++ b/be/src/olap/rowset/vertical_beta_rowset_writer.cpp @@ -82,9 +82,9 @@ Status VerticalBetaRowsetWriter::add_columns(const vectorized::Block* block, } RETURN_IF_ERROR(_segment_writers[_cur_writer_idx]->append_block(block, 0, num_rows)); } else { + // value columns size_t left = num_rows; while (left > 0) { - // value columns uint32_t num_rows_written = _segment_writers[_cur_writer_idx]->num_rows_written(); VLOG_NOTICE << "num_rows_written: " << num_rows_written << ", _cur_writer_idx: " << _cur_writer_idx; From 626ce560cbc6978a543fed1baf869e41bce3cf76 Mon Sep 17 00:00:00 2001 From: Luwei <814383175@qq.com> Date: Fri, 19 Jul 2024 17:00:29 +0800 Subject: [PATCH 6/7] fix --- be/src/olap/rowset/vertical_beta_rowset_writer.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/be/src/olap/rowset/vertical_beta_rowset_writer.cpp b/be/src/olap/rowset/vertical_beta_rowset_writer.cpp index cd925dd44b8ee5..24319ff984c422 100644 --- a/be/src/olap/rowset/vertical_beta_rowset_writer.cpp +++ b/be/src/olap/rowset/vertical_beta_rowset_writer.cpp @@ -97,8 +97,8 @@ Status VerticalBetaRowsetWriter::add_columns(const vectorized::Block* block, } int64_t to_write = num_rows_written + left >= num_rows_key_group - ? num_rows_key_group - num_rows_written - : left; + ? num_rows_key_group - num_rows_written + : left; RETURN_IF_ERROR(_segment_writers[_cur_writer_idx]->append_block(block, num_rows - left, to_write)); left -= to_write; From 405b8ade00fd57bd0d65489a69e93b2c87f73a4e Mon Sep 17 00:00:00 2001 From: Luwei <814383175@qq.com> Date: Sat, 20 Jul 2024 11:34:20 +0800 Subject: [PATCH 7/7] fix compile --- be/src/olap/rowset/vertical_beta_rowset_writer.cpp | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/be/src/olap/rowset/vertical_beta_rowset_writer.cpp b/be/src/olap/rowset/vertical_beta_rowset_writer.cpp index 24319ff984c422..942ced616fcaae 100644 --- a/be/src/olap/rowset/vertical_beta_rowset_writer.cpp +++ b/be/src/olap/rowset/vertical_beta_rowset_writer.cpp @@ -83,13 +83,13 @@ Status VerticalBetaRowsetWriter::add_columns(const vectorized::Block* block, RETURN_IF_ERROR(_segment_writers[_cur_writer_idx]->append_block(block, 0, num_rows)); } else { // value columns - size_t left = num_rows; + int64_t left = num_rows; while (left > 0) { uint32_t num_rows_written = _segment_writers[_cur_writer_idx]->num_rows_written(); VLOG_NOTICE << "num_rows_written: " << num_rows_written << ", _cur_writer_idx: " << _cur_writer_idx; uint32_t num_rows_key_group = _segment_writers[_cur_writer_idx]->row_count(); - CHECK(num_rows_written <= num_rows_key_group); + CHECK_LE(num_rows_written, num_rows_key_group); // init if it's first value column write in current segment if (num_rows_written == 0) { VLOG_NOTICE << "init first value column segment writer"; @@ -102,7 +102,7 @@ Status VerticalBetaRowsetWriter::add_columns(const vectorized::Block* block, RETURN_IF_ERROR(_segment_writers[_cur_writer_idx]->append_block(block, num_rows - left, to_write)); left -= to_write; - CHECK(left >= 0); + CHECK_GE(left, 0); if (left > 0) { ++_cur_writer_idx;