From 5b759f41d05f3de6cfe2d8d28879995e20571d69 Mon Sep 17 00:00:00 2001 From: duyuqi Date: Thu, 7 Jul 2022 14:24:52 +0000 Subject: [PATCH] feat: support new comparator because of design WIP. add a pegasus comparator to support a normal users' sort. Using new pegasus comparator will solve many scan timeout problems. Pegasus can be compatible apps(tables) using the old comparator. pegasus comparator will be the new table's default comparator. more details at: https://github.com/apache/incubator-pegasus/issues/729 --- rdsn/src/dsn.layer2.thrift | 13 +++ rdsn/src/meta/server_state.cpp | 1 + src/server/pegasus_comparator.h | 107 +++++++++++++++++++ src/server/pegasus_server_impl_init.cpp | 8 ++ src/server/test/hashkey_transform_test.cpp | 18 ++++ src/server/test/pegasus_server_impl_test.cpp | 8 ++ src/test/function_test/test_basic.cpp | 11 ++ 7 files changed, 166 insertions(+) create mode 100644 src/server/pegasus_comparator.h diff --git a/rdsn/src/dsn.layer2.thrift b/rdsn/src/dsn.layer2.thrift index 20e76c64a3..b45afad893 100644 --- a/rdsn/src/dsn.layer2.thrift +++ b/rdsn/src/dsn.layer2.thrift @@ -70,6 +70,12 @@ enum app_status AS_RECALLING } +enum comparator_type +{ + DEFAULT, + PEGASUS +} + struct app_info { 1:app_status status = app_status.AS_INVALID; @@ -100,4 +106,11 @@ struct app_info // New fields for bulk load // Whether this app is executing bulk load 14:optional bool is_bulk_loading = false; + + // New fields for rocksdb comparator, Should not change it after app(table) created. + // "comparator_type.DEFAULT": BytesComparator, + // "comparator_type.PEGASUS": a new comparator defined by pegasus. + // new table will use "comparator_type.PEGASUS", + // if no comparator set, use "comparator_type.DEFAULT" for compatible. + 15:optional comparator_type comparator = comparator_type.DEFAULT; } diff --git a/rdsn/src/meta/server_state.cpp b/rdsn/src/meta/server_state.cpp index 7be63bfd1f..c9cc7bf110 100644 --- a/rdsn/src/meta/server_state.cpp +++ b/rdsn/src/meta/server_state.cpp @@ -1161,6 +1161,7 @@ void server_state::create_app(dsn::message_ex *msg) info.status = app_status::AS_CREATING; info.create_second = dsn_now_ms() / 1000; info.init_partition_count = request.options.partition_count; + info.comparator = dsn::comparator_type::PEGASUS; app = app_state::create(info); app->helpers->pending_response = msg; diff --git a/src/server/pegasus_comparator.h b/src/server/pegasus_comparator.h new file mode 100644 index 0000000000..2705c6139d --- /dev/null +++ b/src/server/pegasus_comparator.h @@ -0,0 +1,107 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +#pragma once + +#include + +namespace rocksdb { + +inline bool IsLittleEndian() +{ + static int32_t number = 0x88; + const char* array = reinterpret_cast(&number); + return (array[0] == 0x88); +} + +// The function is copied from rocksdb/util/coding.h + +// Lower-level versions of Get... that read directly from a character buffer +// without any bounds checking. +inline uint16_t DecodeFixed16(const char *ptr) +{ + // port::kLittleEndian + if (IsLittleEndian()) { + // Load the raw bytes + uint16_t result; + memcpy(&result, ptr, sizeof(result)); // gcc optimizes this to a plain load + return result; + } else { + return ((static_cast(static_cast(ptr[0]))) | + (static_cast(static_cast(ptr[1])) << 8)); + } +} + +// The function is copied from rocksdb/util/coding.h +inline bool GetFixed16(Slice *input, uint16_t *value) +{ + if (input->size() < sizeof(uint16_t)) { + return false; + } + *value = DecodeFixed16(input->data()); + input->remove_prefix(sizeof(uint16_t)); + return true; +} + +} // namespace rocksdb + +namespace pegasus { +namespace server { + +class PegasusComparator : public rocksdb::Comparator +{ +public: + PegasusComparator() {} + virtual const char *Name() const { return "PegasusComparator"; } + virtual int Compare(const rocksdb::Slice &left, const rocksdb::Slice &right) const + { + rocksdb::Slice left_slice(left); + uint16_t left_length; + rocksdb::GetFixed16(&left_slice, &left_length); + + rocksdb::Slice right_slice(right); + uint16_t right_length; + rocksdb::GetFixed16(&right_slice, &right_length); + + rocksdb::Slice left_hash_key(left_slice.data(), left_length); + rocksdb::Slice right_hash_key(right_slice.data(), right_length); + int ret = left_hash_key.compare(right_hash_key); + if (ret != 0) { + return ret; + } + left_slice.remove_prefix(left_length); + right_slice.remove_prefix(right_length); + + return left_slice.compare(right_slice); + } + virtual void FindShortestSeparator(std::string *start, const rocksdb::Slice &limit) const + { + (void)start; + (void)limit; + // TODO + } + virtual void FindShortSuccessor(std::string *key) const + { + (void)key; + // TODO + } +}; + +} // namespace server +} // namespace pegasus diff --git a/src/server/pegasus_server_impl_init.cpp b/src/server/pegasus_server_impl_init.cpp index cf34f04cb3..b2b01ca3e8 100644 --- a/src/server/pegasus_server_impl_init.cpp +++ b/src/server/pegasus_server_impl_init.cpp @@ -27,6 +27,7 @@ #include "capacity_unit_calculator.h" #include "hashkey_transform.h" #include "meta_store.h" +#include "pegasus_comparator.h" #include "pegasus_event_listener.h" #include "pegasus_server_write.h" #include "hotkey_collector.h" @@ -327,6 +328,13 @@ pegasus_server_impl::pegasus_server_impl(dsn::replication::replica *r) 60, "rocksdb options.level0_stop_writes_trigger"); + // user-defined comparator + const dsn::app_info *app_info = get_app_info(); + if (app_info->comparator == dsn::comparator_type::PEGASUS) { + // Need not worry about comparator's destruct. + _data_cf_opts.comparator = new pegasus::server::PegasusComparator(); + } + std::string compression_str = dsn_config_get_value_string( "pegasus.server", "rocksdb_compression_type", diff --git a/src/server/test/hashkey_transform_test.cpp b/src/server/test/hashkey_transform_test.cpp index a84c00cc6f..ea14298645 100644 --- a/src/server/test/hashkey_transform_test.cpp +++ b/src/server/test/hashkey_transform_test.cpp @@ -23,6 +23,7 @@ #include #include "base/pegasus_key_schema.h" +#include "server/pegasus_comparator.h" // User define SliceTransform must obey the 4 rules of ColumnFamilyOptions.prefix_extractor: // 1) key.starts_with(prefix(key)) @@ -77,3 +78,20 @@ TEST(HashkeyTransformTest, Basic) ASSERT_EQ(prefix_extractor.Transform(prefix_extractor.Transform(skey4)), prefix_extractor.Transform(skey4)); } + +TEST(HashkeyTransformTest, PegasusComparator) +{ + rocksdb::Comparator *comp = new pegasus::server::PegasusComparator(); + const rocksdb::Comparator *bytes_comp = rocksdb::BytewiseComparator(); + + dsn::blob bkey1, bkey2, bkey3, bkey4; + pegasus::pegasus_generate_key(bkey1, std::string("z"), std::string("")); + pegasus::pegasus_generate_key(bkey2, std::string("aaa"), std::string("")); + rocksdb::Slice skey1(bkey1.data(), bkey1.size()); + rocksdb::Slice skey2(bkey2.data(), bkey2.size()); + + ASSERT_TRUE(bytes_comp->Compare(skey1, skey2) < 0); + ASSERT_TRUE(comp->Compare(skey1, skey2) > 0); + + delete comp; +} diff --git a/src/server/test/pegasus_server_impl_test.cpp b/src/server/test/pegasus_server_impl_test.cpp index 16671e4dc9..5ca8aec96b 100644 --- a/src/server/test/pegasus_server_impl_test.cpp +++ b/src/server/test/pegasus_server_impl_test.cpp @@ -106,6 +106,14 @@ TEST_F(pegasus_server_impl_test, test_open_db_with_latest_options) ASSERT_EQ(opts.disable_auto_compactions, _server->_db->GetOptions().disable_auto_compactions); } +TEST_F(pegasus_server_impl_test, test_open_db_with_comparator) +{ + // TODO(shenxingwuying) + + // open a new db + start(); +} + TEST_F(pegasus_server_impl_test, test_open_db_with_app_envs) { std::map envs; diff --git a/src/test/function_test/test_basic.cpp b/src/test/function_test/test_basic.cpp index d89edc8962..e7c8fa60d0 100644 --- a/src/test/function_test/test_basic.cpp +++ b/src/test/function_test/test_basic.cpp @@ -1852,3 +1852,14 @@ TEST(basic, full_scan_with_filter) ASSERT_EQ(PERR_OK, ret); ASSERT_EQ(8, deleted_count); } + +TEST(basic, TestCreateTable) +{ + // TODO(shenxingwuying) + static const std::string table_with_default_comparator = "table_with_default_comparator"; + static const std::string table_with_pegasus_comparator = "table_with_pegasus_comparator"; + + client = pegasus_client_factory::get_client("mycluster", table_with_default_comparator.c_str()); + + client = pegasus_client_factory::get_client("mycluster", table_with_pegasus_comparator.c_str()); +}