From 8e31fa566ec027edb8e610e6f84750ed584cd1f8 Mon Sep 17 00:00:00 2001 From: eldenmoon <15605149486@163.com> Date: Thu, 24 Oct 2024 23:15:37 +0800 Subject: [PATCH] reduce column mem --- be/src/olap/tablet_column_cache.cpp | 55 +++++++++++++++++++++++++ be/src/olap/tablet_column_cache.h | 60 ++++++++++++++++++++++++++++ be/src/olap/tablet_schema.cpp | 33 ++++++++------- be/src/olap/tablet_schema.h | 16 +++++++- be/src/olap/tablet_schema_cache.cpp | 2 +- be/src/runtime/exec_env.h | 3 ++ be/src/runtime/exec_env_init.cpp | 4 ++ be/src/runtime/memory/cache_policy.h | 8 +++- 8 files changed, 159 insertions(+), 22 deletions(-) create mode 100644 be/src/olap/tablet_column_cache.cpp create mode 100644 be/src/olap/tablet_column_cache.h diff --git a/be/src/olap/tablet_column_cache.cpp b/be/src/olap/tablet_column_cache.cpp new file mode 100644 index 000000000000000..db40c727805cb78 --- /dev/null +++ b/be/src/olap/tablet_column_cache.cpp @@ -0,0 +1,55 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +#include "olap/tablet_column_cache.h" + +#include +#include + +#include "olap/tablet_schema.h" + +namespace doris { + +TabletColumnPtr TabletColumnCache::insert(const std::string& key) { + auto* lru_handle = lookup(key); + TabletColumnPtr tablet_column_ptr; + if (lru_handle) { + auto* value = (CacheValue*)LRUCachePolicy::value(lru_handle); + tablet_column_ptr = value->tablet_column; + VLOG_DEBUG << "reuse column "; + } else { + auto* value = new CacheValue; + tablet_column_ptr = std::make_shared(); + ColumnPB pb; + pb.ParseFromString(key); + tablet_column_ptr->init_from_pb(pb); + VLOG_DEBUG << "create column "; + value->tablet_column = tablet_column_ptr; + lru_handle = + LRUCachePolicy::insert(key, value, sizeof(TabletColumn), 0, CachePriority::NORMAL); + } + DCHECK(lru_handle != nullptr); + return tablet_column_ptr; +} + +void TabletColumnCache::release(Cache::Handle* lru_handle) { + LRUCachePolicy::release(lru_handle); +} + +TabletColumnCache::CacheValue::~CacheValue() = default; + +} // namespace doris diff --git a/be/src/olap/tablet_column_cache.h b/be/src/olap/tablet_column_cache.h new file mode 100644 index 000000000000000..658d2ddb348bfaa --- /dev/null +++ b/be/src/olap/tablet_column_cache.h @@ -0,0 +1,60 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +#pragma once + +#include "olap/tablet_fwd.h" +#include "runtime/exec_env.h" +#include "runtime/memory/lru_cache_policy.h" + +namespace doris { + +// TabletColumnCache is a cache for TabletColumn objects. It is used to reduce memory consumption +// when there are a large number of identical TabletColumns in the cluster, which usually occurs +// when VARIANT type columns are modified and added, each Rowset has an individual TabletSchema. +// Excessive TabletSchemas can lead to significant memory overhead. Reusing memory for identical +// TabletColumns would greatly reduce this memory consumption. + +class TabletColumnCache : public LRUCachePolicy { +public: + TabletColumnCache(size_t capacity) + : LRUCachePolicy(CachePolicy::CacheType::TABLET_COLUMN_CACHE, capacity, + LRUCacheType::NUMBER, config::tablet_schema_cache_recycle_interval) {} + + static TabletColumnCache* create_global_column_cache(size_t capacity) { + auto* res = new TabletColumnCache(capacity); + return res; + } + + static TabletColumnCache* instance() { + return ExecEnv::GetInstance()->get_tablet_column_cache(); + } + + TabletColumnPtr insert(const std::string& key); + + void release(Cache::Handle*); + +private: + class CacheValue : public LRUCacheValueBase { + public: + ~CacheValue() override; + + TabletColumnPtr tablet_column; + }; +}; + +} // namespace doris diff --git a/be/src/olap/tablet_schema.cpp b/be/src/olap/tablet_schema.cpp index c88a23a0c360cf9..78325d3b34d3bf6 100644 --- a/be/src/olap/tablet_schema.cpp +++ b/be/src/olap/tablet_schema.cpp @@ -38,6 +38,7 @@ #include "exec/tablet_info.h" #include "olap/inverted_index_parser.h" #include "olap/olap_define.h" +#include "olap/tablet_column_cache.h" #include "olap/types.h" #include "olap/utils.h" #include "runtime/thread_context.h" @@ -942,7 +943,8 @@ void TabletSchema::clear_columns() { _cols.clear(); } -void TabletSchema::init_from_pb(const TabletSchemaPB& schema, bool ignore_extracted_columns) { +void TabletSchema::init_from_pb(const TabletSchemaPB& schema, bool ignore_extracted_columns, + bool reuse_cache_column) { _keys_type = schema.keys_type(); _num_columns = 0; _num_variant_columns = 0; @@ -957,21 +959,27 @@ void TabletSchema::init_from_pb(const TabletSchemaPB& schema, bool ignore_extrac _cluster_key_idxes.push_back(i); } for (auto& column_pb : schema.column()) { - TabletColumn column; - column.init_from_pb(column_pb); - if (ignore_extracted_columns && column.is_extracted_column()) { + TabletColumnPtr column; + if (reuse_cache_column) { + column = TabletColumnCache::instance()->insert( + deterministic_string_serialize(column_pb)); + } else { + column = std::make_shared(); + column->init_from_pb(column_pb); + } + if (ignore_extracted_columns && column->is_extracted_column()) { continue; } - if (column.is_key()) { + if (column->is_key()) { _num_key_columns++; } - if (column.is_nullable()) { + if (column->is_nullable()) { _num_null_columns++; } - if (column.is_variant_type()) { + if (column->is_variant_type()) { ++_num_variant_columns; } - _cols.emplace_back(std::make_shared(std::move(column))); + _cols.emplace_back(std::move(column)); _vl_field_mem_size += sizeof(StringRef) + sizeof(char) * _cols.back()->name().size() + sizeof(size_t); _field_name_to_index.emplace(StringRef(_cols.back()->name()), _num_columns); @@ -1565,13 +1573,4 @@ bool operator!=(const TabletSchema& a, const TabletSchema& b) { return !(a == b); } -std::string TabletSchema::deterministic_string_serialize(const TabletSchemaPB& schema_pb) { - std::string output; - google::protobuf::io::StringOutputStream string_output_stream(&output); - google::protobuf::io::CodedOutputStream output_stream(&string_output_stream); - output_stream.SetSerializationDeterministic(true); - schema_pb.SerializeToCodedStream(&output_stream); - return output; -} - } // namespace doris diff --git a/be/src/olap/tablet_schema.h b/be/src/olap/tablet_schema.h index ebe2c63c7f30d20..1db031fe42153b6 100644 --- a/be/src/olap/tablet_schema.h +++ b/be/src/olap/tablet_schema.h @@ -298,10 +298,22 @@ class TabletSchema : public MetadataAdder { TabletSchema(); virtual ~TabletSchema(); - void init_from_pb(const TabletSchemaPB& schema, bool ignore_extracted_columns = false); + // Init from pb + // ignore_extracted_columns: ignore the extracted columns from variant column + // reuse_cached_column: reuse the cached column in the schema if they are the same, to reduce memory usage + void init_from_pb(const TabletSchemaPB& schema, bool ignore_extracted_columns = false, + bool reuse_cached_column = false); // Notice: Use deterministic way to serialize protobuf, // since serialize Map in protobuf may could lead to un-deterministic by default - static std::string deterministic_string_serialize(const TabletSchemaPB& schema_pb); + template + static std::string deterministic_string_serialize(const PbType& pb) { + std::string output; + google::protobuf::io::StringOutputStream string_output_stream(&output); + google::protobuf::io::CodedOutputStream output_stream(&string_output_stream); + output_stream.SetSerializationDeterministic(true); + pb.SerializeToCodedStream(&output_stream); + return output; + } void to_schema_pb(TabletSchemaPB* tablet_meta_pb) const; void append_column(TabletColumn column, ColumnType col_type = ColumnType::NORMAL); void append_index(TabletIndex index); diff --git a/be/src/olap/tablet_schema_cache.cpp b/be/src/olap/tablet_schema_cache.cpp index e339c947bb97a4e..f48a53721275597 100644 --- a/be/src/olap/tablet_schema_cache.cpp +++ b/be/src/olap/tablet_schema_cache.cpp @@ -38,7 +38,7 @@ std::pair TabletSchemaCache::insert(const std: tablet_schema_ptr = std::make_shared(); TabletSchemaPB pb; pb.ParseFromString(key); - tablet_schema_ptr->init_from_pb(pb); + tablet_schema_ptr->init_from_pb(pb, false, true); value->tablet_schema = tablet_schema_ptr; lru_handle = LRUCachePolicy::insert(key, value, tablet_schema_ptr->num_columns(), 0, CachePriority::NORMAL); diff --git a/be/src/runtime/exec_env.h b/be/src/runtime/exec_env.h index fdbd6507d134720..e2bf77983e944fe 100644 --- a/be/src/runtime/exec_env.h +++ b/be/src/runtime/exec_env.h @@ -101,6 +101,7 @@ class FrontendServiceClient; class FileMetaCache; class GroupCommitMgr; class TabletSchemaCache; +class TabletColumnCache; class UserFunctionCache; class SchemaCache; class StoragePageCache; @@ -300,6 +301,7 @@ class ExecEnv { std::map get_running_frontends(); TabletSchemaCache* get_tablet_schema_cache() { return _tablet_schema_cache; } + TabletColumnCache* get_tablet_column_cache() { return _tablet_column_cache; } SchemaCache* schema_cache() { return _schema_cache; } StoragePageCache* get_storage_page_cache() { return _storage_page_cache; } SegmentLoader* segment_loader() { return _segment_loader; } @@ -439,6 +441,7 @@ class ExecEnv { // these redundancy header could introduce potential bug, at least, more header means slow compile. // So we choose to use raw pointer, please remember to delete these pointer in deconstructor. TabletSchemaCache* _tablet_schema_cache = nullptr; + TabletColumnCache* _tablet_column_cache = nullptr; std::unique_ptr _storage_engine; SchemaCache* _schema_cache = nullptr; StoragePageCache* _storage_page_cache = nullptr; diff --git a/be/src/runtime/exec_env_init.cpp b/be/src/runtime/exec_env_init.cpp index ff6205bf55e5d03..a4e779a4510c021 100644 --- a/be/src/runtime/exec_env_init.cpp +++ b/be/src/runtime/exec_env_init.cpp @@ -53,6 +53,7 @@ #include "olap/schema_cache.h" #include "olap/segment_loader.h" #include "olap/storage_engine.h" +#include "olap/tablet_column_cache.h" #include "olap/tablet_schema_cache.h" #include "olap/wal/wal_manager.h" #include "pipeline/pipeline_tracing.h" @@ -339,6 +340,9 @@ Status ExecEnv::_init(const std::vector& store_paths, _tablet_schema_cache = TabletSchemaCache::create_global_schema_cache(config::tablet_schema_cache_capacity); + _tablet_column_cache = + TabletColumnCache::create_global_column_cache(config::tablet_schema_cache_capacity); + // Storage engine doris::EngineOptions options; options.store_paths = store_paths; diff --git a/be/src/runtime/memory/cache_policy.h b/be/src/runtime/memory/cache_policy.h index 666d32bdb56e4d8..a88d22e5c5e6636 100644 --- a/be/src/runtime/memory/cache_policy.h +++ b/be/src/runtime/memory/cache_policy.h @@ -48,7 +48,8 @@ class CachePolicy { CLOUD_TXN_DELETE_BITMAP_CACHE = 17, NONE = 18, // not be used FOR_UT_CACHE_NUMBER = 19, - QUERY_CACHE = 20 + QUERY_CACHE = 20, + TABLET_COLUMN_CACHE = 21, }; static std::string type_string(CacheType type) { @@ -93,6 +94,8 @@ class CachePolicy { return "ForUTCacheNumber"; case CacheType::QUERY_CACHE: return "QueryCache"; + case CacheType::TABLET_COLUMN_CACHE: + return "TabletColumnCache"; default: LOG(FATAL) << "not match type of cache policy :" << static_cast(type); } @@ -119,7 +122,8 @@ class CachePolicy { {"CreateTabletRRIdxCache", CacheType::CREATE_TABLET_RR_IDX_CACHE}, {"CloudTabletCache", CacheType::CLOUD_TABLET_CACHE}, {"CloudTxnDeleteBitmapCache", CacheType::CLOUD_TXN_DELETE_BITMAP_CACHE}, - {"ForUTCacheNumber", CacheType::FOR_UT_CACHE_NUMBER}}; + {"ForUTCacheNumber", CacheType::FOR_UT_CACHE_NUMBER}, + {"TabletColumnCache", CacheType::TABLET_COLUMN_CACHE}}; static CacheType string_to_type(std::string type) { if (StringToType.contains(type)) {