From 6c8d54a54615960d1f4f2e2524b9ab40ff191488 Mon Sep 17 00:00:00 2001 From: gengliqi Date: Mon, 6 Feb 2023 16:39:00 +0800 Subject: [PATCH 1/3] reduce memory allocation for filter when result_size_hint equals -1 Signed-off-by: gengliqi --- dbms/src/Columns/ColumnDecimal.cpp | 6 ++++-- dbms/src/Columns/ColumnFixedString.cpp | 7 +++++-- dbms/src/Columns/ColumnVector.cpp | 7 +++++-- dbms/src/Columns/ColumnsCommon.cpp | 15 ++++++++------- dbms/src/Columns/IColumn.h | 2 +- 5 files changed, 23 insertions(+), 14 deletions(-) diff --git a/dbms/src/Columns/ColumnDecimal.cpp b/dbms/src/Columns/ColumnDecimal.cpp index 8af6cd9b6e7..25f85ddb986 100644 --- a/dbms/src/Columns/ColumnDecimal.cpp +++ b/dbms/src/Columns/ColumnDecimal.cpp @@ -305,8 +305,10 @@ ColumnPtr ColumnDecimal::filter(const IColumn::Filter & filt, ssize_t result_ auto res = this->create(0, scale); Container & res_data = res->getData(); - if (result_size_hint) - res_data.reserve(result_size_hint > 0 ? result_size_hint : size); + if (result_size_hint < 0) + res_data.reserve(countBytesInFilter(filt)); + else if (result_size_hint > 0) + res_data.reserve(result_size_hint); const UInt8 * filt_pos = filt.data(); const UInt8 * filt_end = filt_pos + size; diff --git a/dbms/src/Columns/ColumnFixedString.cpp b/dbms/src/Columns/ColumnFixedString.cpp index ef4a7b287c4..a28177bc0b6 100644 --- a/dbms/src/Columns/ColumnFixedString.cpp +++ b/dbms/src/Columns/ColumnFixedString.cpp @@ -13,6 +13,7 @@ // limitations under the License. #include +#include #include #include #include @@ -204,8 +205,10 @@ ColumnPtr ColumnFixedString::filter(const IColumn::Filter & filt, ssize_t result auto res = ColumnFixedString::create(n); - if (result_size_hint) - res->chars.reserve(result_size_hint > 0 ? result_size_hint * n : chars.size()); + if (result_size_hint < 0) + res->chars.reserve(countBytesInFilter(filt) * n); + else if (result_size_hint > 0) + res->chars.reserve(result_size_hint * n); const UInt8 * filt_pos = &filt[0]; const UInt8 * filt_end = filt_pos + col_size; diff --git a/dbms/src/Columns/ColumnVector.cpp b/dbms/src/Columns/ColumnVector.cpp index 3ea8af02309..03fd5cab5c8 100644 --- a/dbms/src/Columns/ColumnVector.cpp +++ b/dbms/src/Columns/ColumnVector.cpp @@ -13,6 +13,7 @@ // limitations under the License. #include +#include #include #include #include @@ -212,8 +213,10 @@ ColumnPtr ColumnVector::filter(const IColumn::Filter & filt, ssize_t result_s auto res = this->create(); Container & res_data = res->getData(); - if (result_size_hint) - res_data.reserve(result_size_hint > 0 ? result_size_hint : size); + if (result_size_hint < 0) + res_data.reserve(countBytesInFilter(filt)); + else if (result_size_hint > 0) + res_data.reserve(result_size_hint); const UInt8 * filt_pos = &filt[0]; const UInt8 * filt_end = filt_pos + size; diff --git a/dbms/src/Columns/ColumnsCommon.cpp b/dbms/src/Columns/ColumnsCommon.cpp index da468c86505..5dd2f466864 100644 --- a/dbms/src/Columns/ColumnsCommon.cpp +++ b/dbms/src/Columns/ColumnsCommon.cpp @@ -122,9 +122,9 @@ struct ResultOffsetsBuilder : res_offsets(*res_offsets_) {} - void reserve(ssize_t result_size_hint, size_t src_size) + void reserve(size_t result_size_hint) { - res_offsets.reserve(result_size_hint > 0 ? result_size_hint : src_size); + res_offsets.reserve(result_size_hint); } void insertOne(size_t array_size) @@ -165,7 +165,7 @@ struct ResultOffsetsBuilder struct NoResultOffsetsBuilder { explicit NoResultOffsetsBuilder(IColumn::Offsets *) {} - void reserve(ssize_t, size_t) {} + void reserve(size_t) {} void insertOne(size_t) {} template @@ -196,11 +196,12 @@ void filterArraysImplGeneric( if (result_size_hint) { - result_offsets_builder.reserve(result_size_hint, size); - if (result_size_hint < 0) - res_elems.reserve(src_elems.size()); - else if (result_size_hint < 1000000000 && src_elems.size() < 1000000000) /// Avoid overflow. + result_size_hint = countBytesInFilter(filt); + + result_offsets_builder.reserve(result_size_hint); + + if (result_size_hint < 1000000000 && src_elems.size() < 1000000000) /// Avoid overflow. res_elems.reserve((result_size_hint * src_elems.size() + size - 1) / size); } diff --git a/dbms/src/Columns/IColumn.h b/dbms/src/Columns/IColumn.h index ad93ce7ae73..ecc0a3f986e 100644 --- a/dbms/src/Columns/IColumn.h +++ b/dbms/src/Columns/IColumn.h @@ -216,7 +216,7 @@ class IColumn : public COWPtr * Is used in WHERE and HAVING operations. * If result_size_hint > 0, then makes advance reserve(result_size_hint) for the result column; * if 0, then don't makes reserve(), - * otherwise (i.e. < 0), makes reserve() using size of source column. + * otherwise (i.e. < 0), makes reserve() using size of column after being filtered. */ using Filter = PaddedPODArray; virtual Ptr filter(const Filter & filt, ssize_t result_size_hint) const = 0; From 5f99cbbab00e7b422d761386897ed1b45ac0f221 Mon Sep 17 00:00:00 2001 From: gengliqi Date: Mon, 6 Feb 2023 16:58:50 +0800 Subject: [PATCH 2/3] update Signed-off-by: gengliqi --- dbms/src/Columns/ColumnAggregateFunction.cpp | 7 ++++++- dbms/src/Columns/ColumnArray.h | 1 + dbms/src/Columns/ColumnDecimal.cpp | 8 +++++--- dbms/src/Columns/ColumnFixedString.cpp | 8 +++++--- dbms/src/Columns/ColumnVector.cpp | 8 +++++--- 5 files changed, 22 insertions(+), 10 deletions(-) diff --git a/dbms/src/Columns/ColumnAggregateFunction.cpp b/dbms/src/Columns/ColumnAggregateFunction.cpp index 8702a2b241a..941d8988375 100644 --- a/dbms/src/Columns/ColumnAggregateFunction.cpp +++ b/dbms/src/Columns/ColumnAggregateFunction.cpp @@ -14,6 +14,7 @@ #include #include +#include #include #include #include @@ -143,7 +144,11 @@ ColumnPtr ColumnAggregateFunction::filter(const Filter & filter, ssize_t result_ auto & res_data = res->getData(); if (result_size_hint) - res_data.reserve(result_size_hint > 0 ? result_size_hint : size); + { + if (result_size_hint < 0) + result_size_hint = countBytesInFilter(filter); + res_data.reserve(result_size_hint); + } for (size_t i = 0; i < size; ++i) if (filter[i]) diff --git a/dbms/src/Columns/ColumnArray.h b/dbms/src/Columns/ColumnArray.h index 6493dbecab5..5b10e02272e 100644 --- a/dbms/src/Columns/ColumnArray.h +++ b/dbms/src/Columns/ColumnArray.h @@ -87,6 +87,7 @@ class ColumnArray final : public COWPtrHelper void insertFrom(const IColumn & src_, size_t n) override; void insertDefault() override; void popBack(size_t n) override; + /// TODO: If result_size_hint < 0, makes reserve() using size of column after being filtered, not source column. ColumnPtr filter(const Filter & filt, ssize_t result_size_hint) const override; ColumnPtr permute(const Permutation & perm, size_t limit) const override; int compareAt(size_t n, size_t m, const IColumn & rhs_, int nan_direction_hint) const override; diff --git a/dbms/src/Columns/ColumnDecimal.cpp b/dbms/src/Columns/ColumnDecimal.cpp index 25f85ddb986..48f263f5399 100644 --- a/dbms/src/Columns/ColumnDecimal.cpp +++ b/dbms/src/Columns/ColumnDecimal.cpp @@ -305,10 +305,12 @@ ColumnPtr ColumnDecimal::filter(const IColumn::Filter & filt, ssize_t result_ auto res = this->create(0, scale); Container & res_data = res->getData(); - if (result_size_hint < 0) - res_data.reserve(countBytesInFilter(filt)); - else if (result_size_hint > 0) + if (result_size_hint) + { + if (result_size_hint < 0) + result_size_hint = countBytesInFilter(filt); res_data.reserve(result_size_hint); + } const UInt8 * filt_pos = filt.data(); const UInt8 * filt_end = filt_pos + size; diff --git a/dbms/src/Columns/ColumnFixedString.cpp b/dbms/src/Columns/ColumnFixedString.cpp index a28177bc0b6..a07d3f37d1d 100644 --- a/dbms/src/Columns/ColumnFixedString.cpp +++ b/dbms/src/Columns/ColumnFixedString.cpp @@ -205,10 +205,12 @@ ColumnPtr ColumnFixedString::filter(const IColumn::Filter & filt, ssize_t result auto res = ColumnFixedString::create(n); - if (result_size_hint < 0) - res->chars.reserve(countBytesInFilter(filt) * n); - else if (result_size_hint > 0) + if (result_size_hint) + { + if (result_size_hint < 0) + result_size_hint = countBytesInFilter(filt); res->chars.reserve(result_size_hint * n); + } const UInt8 * filt_pos = &filt[0]; const UInt8 * filt_end = filt_pos + col_size; diff --git a/dbms/src/Columns/ColumnVector.cpp b/dbms/src/Columns/ColumnVector.cpp index 03fd5cab5c8..90c4710c170 100644 --- a/dbms/src/Columns/ColumnVector.cpp +++ b/dbms/src/Columns/ColumnVector.cpp @@ -213,10 +213,12 @@ ColumnPtr ColumnVector::filter(const IColumn::Filter & filt, ssize_t result_s auto res = this->create(); Container & res_data = res->getData(); - if (result_size_hint < 0) - res_data.reserve(countBytesInFilter(filt)); - else if (result_size_hint > 0) + if (result_size_hint) + { + if (result_size_hint < 0) + result_size_hint = countBytesInFilter(filt); res_data.reserve(result_size_hint); + } const UInt8 * filt_pos = &filt[0]; const UInt8 * filt_end = filt_pos + size; From a1b517b2c0fc0795b1238455c9637c12f3840bc8 Mon Sep 17 00:00:00 2001 From: gengliqi Date: Mon, 6 Feb 2023 17:04:17 +0800 Subject: [PATCH 3/3] update comments Signed-off-by: gengliqi --- dbms/src/Columns/ColumnArray.h | 2 +- dbms/src/Columns/IColumn.h | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/dbms/src/Columns/ColumnArray.h b/dbms/src/Columns/ColumnArray.h index 5b10e02272e..43bb55c73d6 100644 --- a/dbms/src/Columns/ColumnArray.h +++ b/dbms/src/Columns/ColumnArray.h @@ -87,7 +87,7 @@ class ColumnArray final : public COWPtrHelper void insertFrom(const IColumn & src_, size_t n) override; void insertDefault() override; void popBack(size_t n) override; - /// TODO: If result_size_hint < 0, makes reserve() using size of column after being filtered, not source column. + /// TODO: If result_size_hint < 0, makes reserve() using size of filtered column, not source column to avoid some OOM issues. ColumnPtr filter(const Filter & filt, ssize_t result_size_hint) const override; ColumnPtr permute(const Permutation & perm, size_t limit) const override; int compareAt(size_t n, size_t m, const IColumn & rhs_, int nan_direction_hint) const override; diff --git a/dbms/src/Columns/IColumn.h b/dbms/src/Columns/IColumn.h index ecc0a3f986e..410f4641e60 100644 --- a/dbms/src/Columns/IColumn.h +++ b/dbms/src/Columns/IColumn.h @@ -216,7 +216,7 @@ class IColumn : public COWPtr * Is used in WHERE and HAVING operations. * If result_size_hint > 0, then makes advance reserve(result_size_hint) for the result column; * if 0, then don't makes reserve(), - * otherwise (i.e. < 0), makes reserve() using size of column after being filtered. + * otherwise (i.e. < 0), makes reserve() using size of filtered column. */ using Filter = PaddedPODArray; virtual Ptr filter(const Filter & filt, ssize_t result_size_hint) const = 0;