Skip to content

Commit

Permalink
rename to TabletColumnObjectPool and reduce memory for tablet schema …
Browse files Browse the repository at this point in the history
…cache's LRU cache
  • Loading branch information
eldenmoon committed Oct 31, 2024
1 parent a4fb4ef commit 0761331
Show file tree
Hide file tree
Showing 8 changed files with 56 additions and 39 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@
// specific language governing permissions and limitations
// under the License.

#include "olap/tablet_column_cache.h"
#include "olap/tablet_column_object_pool.h"

#include <gen_cpp/AgentService_types.h>
#include <gen_cpp/olap_file.pb.h>
Expand All @@ -24,7 +24,7 @@

namespace doris {

TabletColumnPtr TabletColumnCache::insert(const std::string& key) {
TabletColumnPtr TabletColumnObjectPool::insert(const std::string& key) {
auto* lru_handle = lookup(key);
TabletColumnPtr tablet_column_ptr;
if (lru_handle) {
Expand All @@ -46,10 +46,4 @@ TabletColumnPtr TabletColumnCache::insert(const std::string& key) {
return tablet_column_ptr;
}

void TabletColumnCache::release(Cache::Handle* lru_handle) {
LRUCachePolicy::release(lru_handle);
}

TabletColumnCache::CacheValue::~CacheValue() = default;

} // namespace doris
Original file line number Diff line number Diff line change
Expand Up @@ -23,36 +23,32 @@

namespace doris {

// TabletColumnCache is a cache for TabletColumn objects. It is used to reduce memory consumption
// TabletColumnObjectPool is a cache for TabletColumn objects. It is used to reduce memory consumption
// when there are a large number of identical TabletColumns in the cluster, which usually occurs
// when VARIANT type columns are modified and added, each Rowset has an individual TabletSchema.
// Excessive TabletSchemas can lead to significant memory overhead. Reusing memory for identical
// TabletColumns would greatly reduce this memory consumption.

class TabletColumnCache : public LRUCachePolicy {
class TabletColumnObjectPool : public LRUCachePolicy {
public:
TabletColumnCache(size_t capacity)
: LRUCachePolicy(CachePolicy::CacheType::TABLET_COLUMN_CACHE, capacity,
TabletColumnObjectPool(size_t capacity)
: LRUCachePolicy(CachePolicy::CacheType::TABLET_COLUMN_OBJECT_POOL, capacity,
LRUCacheType::NUMBER, config::tablet_schema_cache_recycle_interval) {}

static TabletColumnCache* create_global_column_cache(size_t capacity) {
auto* res = new TabletColumnCache(capacity);
static TabletColumnObjectPool* create_global_column_cache(size_t capacity) {
auto* res = new TabletColumnObjectPool(capacity);
return res;
}

static TabletColumnCache* instance() {
return ExecEnv::GetInstance()->get_tablet_column_cache();
static TabletColumnObjectPool* instance() {
return ExecEnv::GetInstance()->get_tablet_column_object_pool();
}

TabletColumnPtr insert(const std::string& key);

void release(Cache::Handle*);

private:
class CacheValue : public LRUCacheValueBase {
public:
~CacheValue() override;

TabletColumnPtr tablet_column;
};
};
Expand Down
4 changes: 2 additions & 2 deletions be/src/olap/tablet_schema.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -38,7 +38,7 @@
#include "exec/tablet_info.h"
#include "olap/inverted_index_parser.h"
#include "olap/olap_define.h"
#include "olap/tablet_column_cache.h"
#include "olap/tablet_column_object_pool.h"
#include "olap/types.h"
#include "olap/utils.h"
#include "runtime/thread_context.h"
Expand Down Expand Up @@ -961,7 +961,7 @@ void TabletSchema::init_from_pb(const TabletSchemaPB& schema, bool ignore_extrac
for (auto& column_pb : schema.column()) {
TabletColumnPtr column;
if (reuse_cache_column) {
column = TabletColumnCache::instance()->insert(
column = TabletColumnObjectPool::instance()->insert(
deterministic_string_serialize(column_pb));
} else {
column = std::make_shared<TabletColumn>();
Expand Down
31 changes: 28 additions & 3 deletions be/src/olap/tablet_schema_cache.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -18,21 +18,46 @@
#include "olap/tablet_schema_cache.h"

#include <gen_cpp/olap_file.pb.h>
#include <glog/logging.h>

#include <boost/uuid/name_generator_md5.hpp>
#include <boost/uuid/uuid.hpp>
#include <boost/uuid/uuid_generators.hpp>
#include <boost/uuid/uuid_io.hpp>

#include "bvar/bvar.h"
#include "olap/tablet_schema.h"

bvar::Adder<int64_t> g_tablet_schema_cache_count("tablet_schema_cache_count");
bvar::Adder<int64_t> g_tablet_schema_cache_columns_count("tablet_schema_cache_columns_count");
bvar::Adder<int64_t> g_tablet_schema_cache_hit_count("tablet_schema_cache_hit_count");

namespace doris {

// Get the key signature of the TabletSchemaPB, which is used as the key of LRUCache,
// to reduce the memory consumption of the serialized TabletSchema as key.
static std::string get_key_signature(const std::string& origin) {
// Use UUID namespace DNS to manually create
const static boost::uuids::uuid namespace_dns =
boost::uuids::string_generator()("6ba7b810-9dad-11d1-80b4-00c04fd430c8");

// base on MD5 hash algorithm to generate UUID
boost::uuids::name_generator_md5 gen(namespace_dns);
boost::uuids::uuid uuid = gen(origin);

// return UUID as signature.
return boost::uuids::to_string(uuid);
}

std::pair<Cache::Handle*, TabletSchemaSPtr> TabletSchemaCache::insert(const std::string& key) {
auto* lru_handle = lookup(key);
std::string key_signature = get_key_signature(key);
auto* lru_handle = lookup(key_signature);
TabletSchemaSPtr tablet_schema_ptr;
if (lru_handle) {
auto* value = (CacheValue*)LRUCachePolicy::value(lru_handle);
tablet_schema_ptr = value->tablet_schema;
LOG_IF(FATAL, tablet_schema_ptr->to_key() != key) << "key signature not match";
g_tablet_schema_cache_hit_count << 1;
} else {
auto* value = new CacheValue;
tablet_schema_ptr = std::make_shared<TabletSchema>();
Expand All @@ -41,8 +66,8 @@ std::pair<Cache::Handle*, TabletSchemaSPtr> TabletSchemaCache::insert(const std:
// We should reuse the memory of the same TabletColumn object, set reuse_cached_column to true
tablet_schema_ptr->init_from_pb(pb, false, true);
value->tablet_schema = tablet_schema_ptr;
lru_handle = LRUCachePolicy::insert(key, value, tablet_schema_ptr->num_columns(), 0,
CachePriority::NORMAL);
lru_handle = LRUCachePolicy::insert(key_signature, value, tablet_schema_ptr->num_columns(),
0, CachePriority::NORMAL);
g_tablet_schema_cache_count << 1;
g_tablet_schema_cache_columns_count << tablet_schema_ptr->num_columns();
}
Expand Down
10 changes: 6 additions & 4 deletions be/src/runtime/exec_env.h
Original file line number Diff line number Diff line change
Expand Up @@ -101,7 +101,7 @@ class FrontendServiceClient;
class FileMetaCache;
class GroupCommitMgr;
class TabletSchemaCache;
class TabletColumnCache;
class TabletColumnObjectPool;
class UserFunctionCache;
class SchemaCache;
class StoragePageCache;
Expand Down Expand Up @@ -276,7 +276,9 @@ class ExecEnv {
void set_cache_manager(CacheManager* cm) { this->_cache_manager = cm; }
void set_process_profile(ProcessProfile* pp) { this->_process_profile = pp; }
void set_tablet_schema_cache(TabletSchemaCache* c) { this->_tablet_schema_cache = c; }
void set_tablet_column_cache(TabletColumnCache* c) { this->_tablet_column_cache = c; }
void set_tablet_column_object_pool(TabletColumnObjectPool* c) {
this->_tablet_column_object_pool = c;
}
void set_storage_page_cache(StoragePageCache* c) { this->_storage_page_cache = c; }
void set_segment_loader(SegmentLoader* sl) { this->_segment_loader = sl; }
void set_routine_load_task_executor(RoutineLoadTaskExecutor* r) {
Expand All @@ -302,7 +304,7 @@ class ExecEnv {
std::map<TNetworkAddress, FrontendInfo> get_running_frontends();

TabletSchemaCache* get_tablet_schema_cache() { return _tablet_schema_cache; }
TabletColumnCache* get_tablet_column_cache() { return _tablet_column_cache; }
TabletColumnObjectPool* get_tablet_column_object_pool() { return _tablet_column_object_pool; }
SchemaCache* schema_cache() { return _schema_cache; }
StoragePageCache* get_storage_page_cache() { return _storage_page_cache; }
SegmentLoader* segment_loader() { return _segment_loader; }
Expand Down Expand Up @@ -442,7 +444,7 @@ class ExecEnv {
// these redundancy header could introduce potential bug, at least, more header means slow compile.
// So we choose to use raw pointer, please remember to delete these pointer in deconstructor.
TabletSchemaCache* _tablet_schema_cache = nullptr;
TabletColumnCache* _tablet_column_cache = nullptr;
TabletColumnObjectPool* _tablet_column_object_pool = nullptr;
std::unique_ptr<BaseStorageEngine> _storage_engine;
SchemaCache* _schema_cache = nullptr;
StoragePageCache* _storage_page_cache = nullptr;
Expand Down
6 changes: 3 additions & 3 deletions be/src/runtime/exec_env_init.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -53,7 +53,7 @@
#include "olap/schema_cache.h"
#include "olap/segment_loader.h"
#include "olap/storage_engine.h"
#include "olap/tablet_column_cache.h"
#include "olap/tablet_column_object_pool.h"
#include "olap/tablet_schema_cache.h"
#include "olap/wal/wal_manager.h"
#include "pipeline/pipeline_tracing.h"
Expand Down Expand Up @@ -340,8 +340,8 @@ Status ExecEnv::_init(const std::vector<StorePath>& store_paths,
_tablet_schema_cache =
TabletSchemaCache::create_global_schema_cache(config::tablet_schema_cache_capacity);

_tablet_column_cache =
TabletColumnCache::create_global_column_cache(config::tablet_schema_cache_capacity);
_tablet_column_object_pool = TabletColumnObjectPool::create_global_column_cache(
config::tablet_schema_cache_capacity);

// Storage engine
doris::EngineOptions options;
Expand Down
8 changes: 4 additions & 4 deletions be/src/runtime/memory/cache_policy.h
Original file line number Diff line number Diff line change
Expand Up @@ -49,7 +49,7 @@ class CachePolicy {
NONE = 18, // not be used
FOR_UT_CACHE_NUMBER = 19,
QUERY_CACHE = 20,
TABLET_COLUMN_CACHE = 21,
TABLET_COLUMN_OBJECT_POOL = 21,
};

static std::string type_string(CacheType type) {
Expand Down Expand Up @@ -94,8 +94,8 @@ class CachePolicy {
return "ForUTCacheNumber";
case CacheType::QUERY_CACHE:
return "QueryCache";
case CacheType::TABLET_COLUMN_CACHE:
return "TabletColumnCache";
case CacheType::TABLET_COLUMN_OBJECT_POOL:
return "TabletColumnObjectPool";
default:
LOG(FATAL) << "not match type of cache policy :" << static_cast<int>(type);
}
Expand Down Expand Up @@ -123,7 +123,7 @@ class CachePolicy {
{"CloudTabletCache", CacheType::CLOUD_TABLET_CACHE},
{"CloudTxnDeleteBitmapCache", CacheType::CLOUD_TXN_DELETE_BITMAP_CACHE},
{"ForUTCacheNumber", CacheType::FOR_UT_CACHE_NUMBER},
{"TabletColumnCache", CacheType::TABLET_COLUMN_CACHE}};
{"TabletColumnObjectPool", CacheType::TABLET_COLUMN_OBJECT_POOL}};

static CacheType string_to_type(std::string type) {
if (StringToType.contains(type)) {
Expand Down
6 changes: 3 additions & 3 deletions be/test/testutil/run_all_tests.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -31,7 +31,7 @@
#include "olap/page_cache.h"
#include "olap/segment_loader.h"
#include "olap/storage_engine.h"
#include "olap/tablet_column_cache.h"
#include "olap/tablet_column_object_pool.h"
#include "olap/tablet_schema_cache.h"
#include "runtime/exec_env.h"
#include "runtime/memory/cache_manager.h"
Expand Down Expand Up @@ -67,8 +67,8 @@ int main(int argc, char** argv) {
doris::ExecEnv::GetInstance()->set_tablet_schema_cache(
doris::TabletSchemaCache::create_global_schema_cache(
doris::config::tablet_schema_cache_capacity));
doris::ExecEnv::GetInstance()->set_tablet_column_cache(
doris::TabletColumnCache::create_global_column_cache(
doris::ExecEnv::GetInstance()->set_tablet_column_object_pool(
doris::TabletColumnObjectPool::create_global_column_cache(
doris::config::tablet_schema_cache_capacity));
LOG(INFO) << "init config " << st;
doris::Status s = doris::config::set_config("enable_stacktrace", "false");
Expand Down

0 comments on commit 0761331

Please sign in to comment.