diff --git a/be/src/olap/rowset/segment_v2/inverted_index_writer.cpp b/be/src/olap/rowset/segment_v2/inverted_index_writer.cpp index 8729bd0c590276..50874d0db5c72b 100644 --- a/be/src/olap/rowset/segment_v2/inverted_index_writer.cpp +++ b/be/src/olap/rowset/segment_v2/inverted_index_writer.cpp @@ -75,6 +75,23 @@ const int32_t MAX_LEAF_COUNT = 1024; const float MAXMBSortInHeap = 512.0 * 8; const int DIMS = 1; +bool InvertedIndexColumnWriter::check_support_inverted_index(const TabletColumn& column) { + // bellow types are not supported in inverted index for extracted columns + static std::set invalid_types = { + FieldType::OLAP_FIELD_TYPE_DOUBLE, + FieldType::OLAP_FIELD_TYPE_JSONB, + FieldType::OLAP_FIELD_TYPE_ARRAY, + FieldType::OLAP_FIELD_TYPE_FLOAT, + }; + if (column.is_extracted_column() && (invalid_types.contains(column.type()))) { + return false; + } + if (column.is_variant_type()) { + return false; + } + return true; +} + template class InvertedIndexColumnWriterImpl : public InvertedIndexColumnWriter { public: diff --git a/be/src/olap/rowset/segment_v2/inverted_index_writer.h b/be/src/olap/rowset/segment_v2/inverted_index_writer.h index 63c1e219e649e8..da90752db09168 100644 --- a/be/src/olap/rowset/segment_v2/inverted_index_writer.h +++ b/be/src/olap/rowset/segment_v2/inverted_index_writer.h @@ -33,7 +33,6 @@ #include "io/fs/local_file_system.h" #include "olap/olap_common.h" #include "olap/options.h" -#include "olap/tablet_schema.h" namespace doris { class CollectionValue; @@ -41,6 +40,7 @@ class CollectionValue; class Field; class TabletIndex; +class TabletColumn; namespace segment_v2 { class InvertedIndexFileWriter; @@ -74,22 +74,7 @@ class InvertedIndexColumnWriter { // check if the column is valid for inverted index, some columns // are generated from variant, but not all of them are supported - static bool check_support_inverted_index(const TabletColumn& column) { - // bellow types are not supported in inverted index for extracted columns - static std::set invalid_types = { - FieldType::OLAP_FIELD_TYPE_DOUBLE, - FieldType::OLAP_FIELD_TYPE_JSONB, - FieldType::OLAP_FIELD_TYPE_ARRAY, - FieldType::OLAP_FIELD_TYPE_FLOAT, - }; - if (column.is_extracted_column() && (invalid_types.contains(column.type()))) { - return false; - } - if (column.is_variant_type()) { - return false; - } - return true; - } + static bool check_support_inverted_index(const TabletColumn& column); private: DISALLOW_COPY_AND_ASSIGN(InvertedIndexColumnWriter); diff --git a/be/src/olap/tablet_column_object_pool.cpp b/be/src/olap/tablet_column_object_pool.cpp new file mode 100644 index 00000000000000..6e07fb4e831e60 --- /dev/null +++ b/be/src/olap/tablet_column_object_pool.cpp @@ -0,0 +1,57 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +#include "olap/tablet_column_object_pool.h" + +#include +#include + +#include "olap/tablet_schema.h" + +namespace doris { + +bvar::Adder g_tablet_column_cache_count("tablet_column_cache_count"); +bvar::Adder g_tablet_column_cache_hit_count("tablet_column_cache_hit_count"); + +std::pair TabletColumnObjectPool::insert(const std::string& key) { + auto* lru_handle = lookup(key); + TabletColumnPtr tablet_column_ptr; + if (lru_handle) { + auto* value = (CacheValue*)LRUCachePolicy::value(lru_handle); + tablet_column_ptr = value->tablet_column; + VLOG_DEBUG << "reuse column "; + g_tablet_column_cache_hit_count << 1; + } else { + auto* value = new CacheValue; + tablet_column_ptr = std::make_shared(); + ColumnPB pb; + pb.ParseFromString(key); + tablet_column_ptr->init_from_pb(pb); + VLOG_DEBUG << "create column "; + value->tablet_column = tablet_column_ptr; + lru_handle = LRUCachePolicy::insert(key, value, 1, 0, CachePriority::NORMAL); + g_tablet_column_cache_count << 1; + } + DCHECK(lru_handle != nullptr); + return {lru_handle, tablet_column_ptr}; +} + +TabletColumnObjectPool::CacheValue::~CacheValue() { + g_tablet_column_cache_count << -1; +} + +} // namespace doris diff --git a/be/src/olap/tablet_column_object_pool.h b/be/src/olap/tablet_column_object_pool.h new file mode 100644 index 00000000000000..1eead6a25c9609 --- /dev/null +++ b/be/src/olap/tablet_column_object_pool.h @@ -0,0 +1,58 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +#pragma once + +#include "olap/tablet_fwd.h" +#include "olap/tablet_schema.h" +#include "runtime/exec_env.h" +#include "runtime/memory/lru_cache_policy.h" + +namespace doris { + +// TabletColumnObjectPool is a cache for TabletColumn objects. It is used to reduce memory consumption +// when there are a large number of identical TabletColumns in the cluster, which usually occurs +// when VARIANT type columns are modified and added, each Rowset has an individual TabletSchema. +// Excessive TabletSchemas can lead to significant memory overhead. Reusing memory for identical +// TabletColumns would greatly reduce this memory consumption. + +class TabletColumnObjectPool : public LRUCachePolicy { +public: + TabletColumnObjectPool(size_t capacity) + : LRUCachePolicy(CachePolicy::CacheType::TABLET_COLUMN_OBJECT_POOL, capacity, + LRUCacheType::NUMBER, config::tablet_schema_cache_recycle_interval) {} + + static TabletColumnObjectPool* create_global_column_cache(size_t capacity) { + auto* res = new TabletColumnObjectPool(capacity); + return res; + } + + static TabletColumnObjectPool* instance() { + return ExecEnv::GetInstance()->get_tablet_column_object_pool(); + } + + std::pair insert(const std::string& key); + +private: + class CacheValue : public LRUCacheValueBase { + public: + ~CacheValue() override; + TabletColumnPtr tablet_column; + }; +}; + +} // namespace doris diff --git a/be/src/olap/tablet_schema.cpp b/be/src/olap/tablet_schema.cpp index 95c7bd5b773fe6..e264c40202924a 100644 --- a/be/src/olap/tablet_schema.cpp +++ b/be/src/olap/tablet_schema.cpp @@ -38,8 +38,10 @@ #include "exec/tablet_info.h" #include "olap/inverted_index_parser.h" #include "olap/olap_define.h" +#include "olap/tablet_column_object_pool.h" #include "olap/types.h" #include "olap/utils.h" +#include "runtime/memory/lru_cache_policy.h" #include "runtime/thread_context.h" #include "tablet_meta.h" #include "vec/aggregate_functions/aggregate_function_simple_factory.h" @@ -859,6 +861,7 @@ TabletSchema::TabletSchema() { TabletSchema::~TabletSchema() { g_total_tablet_schema_num << -1; + clear_column_cache_handlers(); } void TabletSchema::append_column(TabletColumn column, ColumnType col_type) { @@ -948,9 +951,18 @@ void TabletSchema::clear_columns() { _num_null_columns = 0; _num_key_columns = 0; _cols.clear(); + clear_column_cache_handlers(); } -void TabletSchema::init_from_pb(const TabletSchemaPB& schema, bool ignore_extracted_columns) { +void TabletSchema::clear_column_cache_handlers() { + for (auto* cache_handle : _column_cache_handlers) { + TabletColumnObjectPool::instance()->release(cache_handle); + } + _column_cache_handlers.clear(); +} + +void TabletSchema::init_from_pb(const TabletSchemaPB& schema, bool ignore_extracted_columns, + bool reuse_cache_column) { _keys_type = schema.keys_type(); _num_columns = 0; _num_variant_columns = 0; @@ -961,25 +973,34 @@ void TabletSchema::init_from_pb(const TabletSchemaPB& schema, bool ignore_extrac _field_name_to_index.clear(); _field_id_to_index.clear(); _cluster_key_idxes.clear(); + clear_column_cache_handlers(); for (const auto& i : schema.cluster_key_idxes()) { _cluster_key_idxes.push_back(i); } for (auto& column_pb : schema.column()) { - TabletColumn column; - column.init_from_pb(column_pb); - if (ignore_extracted_columns && column.is_extracted_column()) { + TabletColumnPtr column; + if (reuse_cache_column) { + auto pair = TabletColumnObjectPool::instance()->insert( + deterministic_string_serialize(column_pb)); + column = pair.second; + _column_cache_handlers.push_back(pair.first); + } else { + column = std::make_shared(); + column->init_from_pb(column_pb); + } + if (ignore_extracted_columns && column->is_extracted_column()) { continue; } - if (column.is_key()) { + if (column->is_key()) { _num_key_columns++; } - if (column.is_nullable()) { + if (column->is_nullable()) { _num_null_columns++; } - if (column.is_variant_type()) { + if (column->is_variant_type()) { ++_num_variant_columns; } - _cols.emplace_back(std::make_shared(std::move(column))); + _cols.emplace_back(std::move(column)); _field_name_to_index.emplace(StringRef(_cols.back()->name()), _num_columns); _field_id_to_index[_cols.back()->unique_id()] = _num_columns; _num_columns++; @@ -1091,6 +1112,7 @@ void TabletSchema::build_current_tablet_schema(int64_t index_id, int32_t version _sequence_col_idx = -1; _version_col_idx = -1; _cluster_key_idxes.clear(); + clear_column_cache_handlers(); for (const auto& i : ori_tablet_schema._cluster_key_idxes) { _cluster_key_idxes.push_back(i); } @@ -1563,13 +1585,4 @@ bool operator!=(const TabletSchema& a, const TabletSchema& b) { return !(a == b); } -std::string TabletSchema::deterministic_string_serialize(const TabletSchemaPB& schema_pb) { - std::string output; - google::protobuf::io::StringOutputStream string_output_stream(&output); - google::protobuf::io::CodedOutputStream output_stream(&string_output_stream); - output_stream.SetSerializationDeterministic(true); - schema_pb.SerializeToCodedStream(&output_stream); - return output; -} - } // namespace doris diff --git a/be/src/olap/tablet_schema.h b/be/src/olap/tablet_schema.h index 1d1d6c9de79d24..e2f90e2716fff9 100644 --- a/be/src/olap/tablet_schema.h +++ b/be/src/olap/tablet_schema.h @@ -39,6 +39,7 @@ #include "olap/rowset/segment_v2/options.h" #include "runtime/define_primitive_type.h" #include "runtime/descriptors.h" +#include "runtime/memory/lru_cache_policy.h" #include "util/string_util.h" #include "vec/aggregate_functions/aggregate_function.h" #include "vec/common/string_ref.h" @@ -296,10 +297,22 @@ class TabletSchema { TabletSchema(); virtual ~TabletSchema(); - void init_from_pb(const TabletSchemaPB& schema, bool ignore_extracted_columns = false); + // Init from pb + // ignore_extracted_columns: ignore the extracted columns from variant column + // reuse_cached_column: reuse the cached column in the schema if they are the same, to reduce memory usage + void init_from_pb(const TabletSchemaPB& schema, bool ignore_extracted_columns = false, + bool reuse_cached_column = false); // Notice: Use deterministic way to serialize protobuf, // since serialize Map in protobuf may could lead to un-deterministic by default - static std::string deterministic_string_serialize(const TabletSchemaPB& schema_pb); + template + static std::string deterministic_string_serialize(const PbType& pb) { + std::string output; + google::protobuf::io::StringOutputStream string_output_stream(&output); + google::protobuf::io::CodedOutputStream output_stream(&string_output_stream); + output_stream.SetSerializationDeterministic(true); + pb.SerializeToCodedStream(&output_stream); + return output; + } void to_schema_pb(TabletSchemaPB* tablet_meta_pb) const; void append_column(TabletColumn column, ColumnType col_type = ColumnType::NORMAL); void append_index(TabletIndex index); @@ -501,10 +514,13 @@ class TabletSchema { friend bool operator==(const TabletSchema& a, const TabletSchema& b); friend bool operator!=(const TabletSchema& a, const TabletSchema& b); + void clear_column_cache_handlers(); + KeysType _keys_type = DUP_KEYS; SortType _sort_type = SortType::LEXICAL; size_t _sort_col_num = 0; std::vector _cols; + std::vector _column_cache_handlers; std::vector _indexes; std::unordered_map _field_name_to_index; diff --git a/be/src/olap/tablet_schema_cache.cpp b/be/src/olap/tablet_schema_cache.cpp index e339c947bb97a4..fd238fa5affb3f 100644 --- a/be/src/olap/tablet_schema_cache.cpp +++ b/be/src/olap/tablet_schema_cache.cpp @@ -18,30 +18,45 @@ #include "olap/tablet_schema_cache.h" #include +#include +#include #include "bvar/bvar.h" #include "olap/tablet_schema.h" +#include "util/sha.h" bvar::Adder g_tablet_schema_cache_count("tablet_schema_cache_count"); bvar::Adder g_tablet_schema_cache_columns_count("tablet_schema_cache_columns_count"); +bvar::Adder g_tablet_schema_cache_hit_count("tablet_schema_cache_hit_count"); namespace doris { +// to reduce the memory consumption of the serialized TabletSchema as key. +// use sha256 to prevent from hash collision +static std::string get_key_signature(const std::string& origin) { + SHA256Digest digest; + digest.reset(origin.data(), origin.length()); + return std::string {digest.digest().data(), digest.digest().length()}; +} + std::pair TabletSchemaCache::insert(const std::string& key) { - auto* lru_handle = lookup(key); + std::string key_signature = get_key_signature(key); + auto* lru_handle = lookup(key_signature); TabletSchemaSPtr tablet_schema_ptr; if (lru_handle) { auto* value = (CacheValue*)LRUCachePolicy::value(lru_handle); tablet_schema_ptr = value->tablet_schema; + g_tablet_schema_cache_hit_count << 1; } else { auto* value = new CacheValue; tablet_schema_ptr = std::make_shared(); TabletSchemaPB pb; pb.ParseFromString(key); - tablet_schema_ptr->init_from_pb(pb); + // We should reuse the memory of the same TabletColumn object, set reuse_cached_column to true + tablet_schema_ptr->init_from_pb(pb, false, true); value->tablet_schema = tablet_schema_ptr; - lru_handle = LRUCachePolicy::insert(key, value, tablet_schema_ptr->num_columns(), 0, - CachePriority::NORMAL); + lru_handle = LRUCachePolicy::insert(key_signature, value, tablet_schema_ptr->num_columns(), + 0, CachePriority::NORMAL); g_tablet_schema_cache_count << 1; g_tablet_schema_cache_columns_count << tablet_schema_ptr->num_columns(); } diff --git a/be/src/pipeline/query_cache/query_cache.h b/be/src/pipeline/query_cache/query_cache.h index a905831b530578..827c516ad75f07 100644 --- a/be/src/pipeline/query_cache/query_cache.h +++ b/be/src/pipeline/query_cache/query_cache.h @@ -37,6 +37,7 @@ #include "runtime/memory/mem_tracker.h" #include "util/slice.h" #include "util/time.h" +#include "vec/core/block.h" namespace doris { diff --git a/be/src/runtime/exec_env.h b/be/src/runtime/exec_env.h index 61cebad10b9e78..afd99a624f2c78 100644 --- a/be/src/runtime/exec_env.h +++ b/be/src/runtime/exec_env.h @@ -101,6 +101,7 @@ class FrontendServiceClient; class FileMetaCache; class GroupCommitMgr; class TabletSchemaCache; +class TabletColumnObjectPool; class UserFunctionCache; class SchemaCache; class StoragePageCache; @@ -272,6 +273,9 @@ class ExecEnv { void set_storage_engine(std::unique_ptr&& engine); void set_cache_manager(CacheManager* cm) { this->_cache_manager = cm; } void set_tablet_schema_cache(TabletSchemaCache* c) { this->_tablet_schema_cache = c; } + void set_tablet_column_object_pool(TabletColumnObjectPool* c) { + this->_tablet_column_object_pool = c; + } void set_storage_page_cache(StoragePageCache* c) { this->_storage_page_cache = c; } void set_segment_loader(SegmentLoader* sl) { this->_segment_loader = sl; } void set_routine_load_task_executor(RoutineLoadTaskExecutor* r) { @@ -297,6 +301,7 @@ class ExecEnv { std::map get_running_frontends(); TabletSchemaCache* get_tablet_schema_cache() { return _tablet_schema_cache; } + TabletColumnObjectPool* get_tablet_column_object_pool() { return _tablet_column_object_pool; } SchemaCache* schema_cache() { return _schema_cache; } StoragePageCache* get_storage_page_cache() { return _storage_page_cache; } SegmentLoader* segment_loader() { return _segment_loader; } @@ -434,6 +439,7 @@ class ExecEnv { // these redundancy header could introduce potential bug, at least, more header means slow compile. // So we choose to use raw pointer, please remember to delete these pointer in deconstructor. TabletSchemaCache* _tablet_schema_cache = nullptr; + TabletColumnObjectPool* _tablet_column_object_pool = nullptr; std::unique_ptr _storage_engine; SchemaCache* _schema_cache = nullptr; StoragePageCache* _storage_page_cache = nullptr; diff --git a/be/src/runtime/exec_env_init.cpp b/be/src/runtime/exec_env_init.cpp index c22198545406ea..062069044dcd37 100644 --- a/be/src/runtime/exec_env_init.cpp +++ b/be/src/runtime/exec_env_init.cpp @@ -53,6 +53,7 @@ #include "olap/schema_cache.h" #include "olap/segment_loader.h" #include "olap/storage_engine.h" +#include "olap/tablet_column_object_pool.h" #include "olap/tablet_schema_cache.h" #include "olap/wal/wal_manager.h" #include "pipeline/pipeline_tracing.h" @@ -337,6 +338,9 @@ Status ExecEnv::_init(const std::vector& store_paths, _tablet_schema_cache = TabletSchemaCache::create_global_schema_cache(config::tablet_schema_cache_capacity); + _tablet_column_object_pool = TabletColumnObjectPool::create_global_column_cache( + config::tablet_schema_cache_capacity); + // Storage engine doris::EngineOptions options; options.store_paths = store_paths; diff --git a/be/src/runtime/memory/cache_policy.h b/be/src/runtime/memory/cache_policy.h index 5241efb9c2924a..e7e1c73e7cbb41 100644 --- a/be/src/runtime/memory/cache_policy.h +++ b/be/src/runtime/memory/cache_policy.h @@ -48,7 +48,8 @@ class CachePolicy { CLOUD_TXN_DELETE_BITMAP_CACHE = 17, NONE = 18, // not be used FOR_UT_CACHE_NUMBER = 19, - QUERY_CACHE = 20 + QUERY_CACHE = 20, + TABLET_COLUMN_OBJECT_POOL = 21, }; static std::string type_string(CacheType type) { @@ -92,7 +93,9 @@ class CachePolicy { case CacheType::FOR_UT_CACHE_NUMBER: return "ForUTCacheNumber"; case CacheType::QUERY_CACHE: - return "QUERY_CACHE"; + return "QueryCache"; + case CacheType::TABLET_COLUMN_OBJECT_POOL: + return "TabletColumnObjectPool"; default: LOG(FATAL) << "not match type of cache policy :" << static_cast(type); } @@ -119,7 +122,8 @@ class CachePolicy { {"CreateTabletRRIdxCache", CacheType::CREATE_TABLET_RR_IDX_CACHE}, {"CloudTabletCache", CacheType::CLOUD_TABLET_CACHE}, {"CloudTxnDeleteBitmapCache", CacheType::CLOUD_TXN_DELETE_BITMAP_CACHE}, - {"ForUTCacheNumber", CacheType::FOR_UT_CACHE_NUMBER}}; + {"ForUTCacheNumber", CacheType::FOR_UT_CACHE_NUMBER}, + {"TabletColumnObjectPool", CacheType::TABLET_COLUMN_OBJECT_POOL}}; static CacheType string_to_type(std::string type) { if (StringToType.contains(type)) { diff --git a/be/src/util/block_compression.cpp b/be/src/util/block_compression.cpp index d13c0c091b9ced..d1788b0948a6f2 100644 --- a/be/src/util/block_compression.cpp +++ b/be/src/util/block_compression.cpp @@ -45,6 +45,7 @@ #include #include #include +#include #include #include "common/config.h" diff --git a/be/test/testutil/run_all_tests.cpp b/be/test/testutil/run_all_tests.cpp index a0fc174aeda230..70af1d2e34f2df 100644 --- a/be/test/testutil/run_all_tests.cpp +++ b/be/test/testutil/run_all_tests.cpp @@ -31,6 +31,7 @@ #include "olap/page_cache.h" #include "olap/segment_loader.h" #include "olap/storage_engine.h" +#include "olap/tablet_column_object_pool.h" #include "olap/tablet_schema_cache.h" #include "runtime/exec_env.h" #include "runtime/memory/cache_manager.h" @@ -64,6 +65,9 @@ int main(int argc, char** argv) { doris::ExecEnv::GetInstance()->set_tablet_schema_cache( doris::TabletSchemaCache::create_global_schema_cache( doris::config::tablet_schema_cache_capacity)); + doris::ExecEnv::GetInstance()->set_tablet_column_object_pool( + doris::TabletColumnObjectPool::create_global_column_cache( + doris::config::tablet_schema_cache_capacity)); LOG(INFO) << "init config " << st; doris::Status s = doris::config::set_config("enable_stacktrace", "false"); if (!s.ok()) {