From de720f02db51247d56184688e04c4eaa3e48be47 Mon Sep 17 00:00:00 2001 From: wangbo <506340561@qq.com> Date: Sat, 26 Oct 2019 14:58:41 +0800 Subject: [PATCH] (#2037) 1,fix mem leak when calling string builder.get_dictionary_page; 2, fix delete invalid mem addr in bitshufflleBuilder when no array grow happends --- .../rowset/segment_v2/binary_dict_page.cpp | 9 +++--- .../olap/rowset/segment_v2/binary_dict_page.h | 15 +++++++++ .../rowset/segment_v2/binary_plain_page.h | 32 +++++++++++++------ .../olap/rowset/segment_v2/bitshuffle_page.h | 14 ++++++-- .../olap/rowset/segment_v2/column_writer.cpp | 7 ++-- be/src/olap/rowset/segment_v2/page_builder.h | 11 +++++++ be/src/olap/rowset/segment_v2/rle_page.h | 26 +++++++++++---- 7 files changed, 88 insertions(+), 26 deletions(-) diff --git a/be/src/olap/rowset/segment_v2/binary_dict_page.cpp b/be/src/olap/rowset/segment_v2/binary_dict_page.cpp index 61c9e360c61399b..3a72c620bfd0b72 100644 --- a/be/src/olap/rowset/segment_v2/binary_dict_page.cpp +++ b/be/src/olap/rowset/segment_v2/binary_dict_page.cpp @@ -100,12 +100,11 @@ Status BinaryDictPageBuilder::add(const uint8_t* vals, size_t* count) { } Slice BinaryDictPageBuilder::finish() { - _finished = true; + return _finish(FINISH); +} - Slice data_slice = _data_page_builder->finish(); - _buffer.append(data_slice.data, data_slice.size); - encode_fixed32_le(&_buffer[0], _encoding_type); - return Slice(_buffer); +Slice BinaryDictPageBuilder::finish_and_release() { + return _finish(RELEASE); } void BinaryDictPageBuilder::reset() { diff --git a/be/src/olap/rowset/segment_v2/binary_dict_page.h b/be/src/olap/rowset/segment_v2/binary_dict_page.h index 6a2ef32a9295375..048c4eecb7aa6d7 100644 --- a/be/src/olap/rowset/segment_v2/binary_dict_page.h +++ b/be/src/olap/rowset/segment_v2/binary_dict_page.h @@ -77,7 +77,22 @@ class BinaryDictPageBuilder : public PageBuilder { (void)ret; } + Slice finish_and_release() override; + private: + Slice _finish(BuilderReturnType builderReturnType) { + _finished = true; + + Slice data_slice = _data_page_builder->finish(); + _buffer.append(data_slice.data, data_slice.size); + encode_fixed32_le(&_buffer[0], _encoding_type); + if (builderReturnType == FINISH) { + return Slice(_buffer); + } else { + return Slice(_buffer.release(), _buffer.size()); + } + } + PageBuilderOptions _options; bool _finished; diff --git a/be/src/olap/rowset/segment_v2/binary_plain_page.h b/be/src/olap/rowset/segment_v2/binary_plain_page.h index b442eb85e38740a..3a2b9be0bb7a487 100644 --- a/be/src/olap/rowset/segment_v2/binary_plain_page.h +++ b/be/src/olap/rowset/segment_v2/binary_plain_page.h @@ -79,15 +79,7 @@ class BinaryPlainPageBuilder : public PageBuilder { } Slice finish() override { - _finished = true; - - // Set up trailer - for (int i = 0; i < _offsets.size(); i++) { - put_fixed32_le(&_buffer, _offsets[i]); - } - put_fixed32_le(&_buffer, _offsets.size()); - - return Slice(_buffer); + return _finish(FINISH); } void reset() override { @@ -117,12 +109,34 @@ class BinaryPlainPageBuilder : public PageBuilder { (void) ret; } + Slice finish_and_release() override { + return _finish(RELEASE); + } + void update_prepared_size(size_t added_size) { _prepared_size += added_size; _prepared_size += sizeof(uint32_t); } private: + Slice _finish(BuilderReturnType returnType) { + _finished = true; + + // Set up trailer + for (int i = 0; i < _offsets.size(); i++) { + put_fixed32_le(&_buffer, _offsets[i]); + } + put_fixed32_le(&_buffer, _offsets.size()); + + if (returnType == FINISH) { + return Slice(_buffer); + } else { + Slice slice = Slice(_buffer.release(), _buffer.size()); + _buffer.reserve(_options.data_page_size); + return slice; + } + } + faststring _buffer; size_t _size_estimate; size_t _prepared_size; diff --git a/be/src/olap/rowset/segment_v2/bitshuffle_page.h b/be/src/olap/rowset/segment_v2/bitshuffle_page.h index 1573d681f008edc..08349ac5d66795b 100644 --- a/be/src/olap/rowset/segment_v2/bitshuffle_page.h +++ b/be/src/olap/rowset/segment_v2/bitshuffle_page.h @@ -106,7 +106,7 @@ class BitshufflePageBuilder : public PageBuilder { } Slice finish() override { - return _finish(SIZE_OF_TYPE); + return _finish(SIZE_OF_TYPE, FINISH); } void reset() override { @@ -139,8 +139,12 @@ class BitshufflePageBuilder : public PageBuilder { (void)ret; } + Slice finish_and_release() override { + return _finish(SIZE_OF_TYPE, RELEASE); + } + private: - Slice _finish(int final_size_of_type) { + Slice _finish(int final_size_of_type, BuilderReturnType builderReturnType) { _data.resize(final_size_of_type * _count); // Do padding so that the input num of element is multiple of 8. @@ -169,7 +173,11 @@ class BitshufflePageBuilder : public PageBuilder { encode_fixed32_le(&_buffer[8], num_elems_after_padding); encode_fixed32_le(&_buffer[12], final_size_of_type); _finished = true; - return Slice(_buffer.data(), BITSHUFFLE_PAGE_HEADER_SIZE + bytes); + if (builderReturnType == FINISH) { + return Slice(_buffer.data(), BITSHUFFLE_PAGE_HEADER_SIZE + bytes); + } else { + return Slice(_buffer.release(), BITSHUFFLE_PAGE_HEADER_SIZE + bytes); + } } typedef typename TypeTraits::CppType CppType; diff --git a/be/src/olap/rowset/segment_v2/column_writer.cpp b/be/src/olap/rowset/segment_v2/column_writer.cpp index f14911e3007e48b..a53203cad129521 100644 --- a/be/src/olap/rowset/segment_v2/column_writer.cpp +++ b/be/src/olap/rowset/segment_v2/column_writer.cpp @@ -219,7 +219,9 @@ Status ColumnWriter::write_data() { _page_builder->get_dictionary_page(&dict_page); std::vector origin_data; origin_data.push_back(dict_page); - RETURN_IF_ERROR(_write_physical_page(&origin_data, &_dict_page_pp)); + Status ret = _write_physical_page(&origin_data, &_dict_page_pp); + delete[] dict_page.data; + RETURN_IF_ERROR(ret); } return Status::OK(); } @@ -328,8 +330,7 @@ Status ColumnWriter::_finish_current_page() { Page* page = new Page(); page->first_rowid = _last_first_rowid; page->num_rows = _next_rowid - _last_first_rowid; - page->data = _page_builder->finish(); - _page_builder->release(); + page->data = _page_builder->finish_and_release(); _page_builder->reset(); if (_is_nullable) { page->null_bitmap = _null_bitmap_builder->finish(); diff --git a/be/src/olap/rowset/segment_v2/page_builder.h b/be/src/olap/rowset/segment_v2/page_builder.h index c2cc0eb81360897..4a567a689360482 100644 --- a/be/src/olap/rowset/segment_v2/page_builder.h +++ b/be/src/olap/rowset/segment_v2/page_builder.h @@ -28,6 +28,11 @@ namespace doris { namespace segment_v2 { +enum BuilderReturnType { + FINISH = 1, + RELEASE = 2, +}; + // PageBuilder is used to build page // Page is a data management unit, including: // 1. Data Page: store encoded and compressed data @@ -79,6 +84,12 @@ class PageBuilder { // and should be followed by reset() before reuse the builder virtual void release() = 0; + // Return a Slice which represents the encoded data of current page,and release the slice + // caller need to delete the returned slice + virtual Slice finish_and_release() { + return nullptr; + }; + private: DISALLOW_COPY_AND_ASSIGN(PageBuilder); }; diff --git a/be/src/olap/rowset/segment_v2/rle_page.h b/be/src/olap/rowset/segment_v2/rle_page.h index 46fb197312a09d9..546ddd057af6069 100644 --- a/be/src/olap/rowset/segment_v2/rle_page.h +++ b/be/src/olap/rowset/segment_v2/rle_page.h @@ -98,12 +98,7 @@ class RlePageBuilder : public PageBuilder { } Slice finish() override { - _finished = true; - // here should Flush first and then encode the count header - // or it will lead to a bug if the header is less than 8 byte and the data is small - _rle_encoder->Flush(); - encode_fixed32_le(&_buf[0], _count); - return Slice(_buf.data(), _buf.size()); + return _finish(FINISH); } void reset() override { @@ -129,7 +124,26 @@ class RlePageBuilder : public PageBuilder { (void)ret; } + Slice finish_and_release() override { + return _finish(RELEASE); + } + private: + + Slice _finish(BuilderReturnType builderReturnType) { + _finished = true; + // here should Flush first and then encode the count header + // or it will lead to a bug if the header is less than 8 byte and the data is small + _rle_encoder->Flush(); + encode_fixed32_le(&_buf[0], _count); + if (builderReturnType == FINISH) { + return Slice(_buf.data(), _buf.size()); + } else { + return Slice(_buf.release(), _buf.size()); + } + } + + typedef typename TypeTraits::CppType CppType; enum { SIZE_OF_TYPE = TypeTraits::size