Skip to content

Commit

Permalink
[#7413]: docdb: Fixed MetaCache::ProcessTabletLocations to reuse part…
Browse files Browse the repository at this point in the history
…itions list for co-located tables

Summary:
As a result of the fix for #6890 there was introduced a potential perf issue for the first lookup of tablet by key for colocated tables. Instead of sending 1 RPC when doing first lookup for colocated table and then reusing the result for all tables co-located with the first one, MetaCache is sending 1 more RPC each time another table co-located with the first one is queried to resolve tablet by key.
Since all colocated tables share the same tablet, we can cache the locations on the first RPC to any co-located table and then reuse the result for any MetaCache::LookupTabletByKey calls for any other table co-located with the one already queried.

Suppose we have colocated tables `Table1` and `Table2` sharing `Tablet0`, then behavior without the fix is the following:
1. Someone calls `MetaCache::LookupTabletByKey` for `Table1` and `partition_key=p`
2. `MetaCache` checks that it doesn’t have `TableData` for `Table1`, initializes `TableData` for `Table1` with the list of partitions for `Table1`, and sends an RPC to the master
3. Master returns tablet locations that contain tablet locations for both `Table1` and `Table2`, because they are colocated and share the same tablets set
4. `MetaCache` updates `TableData::tablets_by_partition` for `Table1`
5. Caller gets `Tablet0` as a response to `MetaCache::LookupTabletByKey`
6. Someone calls `MetaCache::LookupTabletByKey` for `Table2` and `partition_key=p`
7. `MetaCache` checks that it doesn’t have `TableData` for `Table2` and sends an RPC to the master

And with the fix, at step 4 `MetaCache` will also initialize `TableData` for `Table2` using the same partitions list which was used for `Table1` and will update `TableData::tablets_by_partition` for both tables. So, at step 7, `MetaCache` will have `TableData` for `Table2` and will respond with the tablet without RPC to the master.

- Fixed `MetaCache::ProcessTabletLocations` to reuse partitions list for co-located tables
- Added ClientTest.ColocatedTablesLookupTablet
- Moved most frequent VLOGS from level 4 to level 5 for `MetaCache`

Test Plan:
For ASAN/TSAN/release/debug:
```
ybd --gtest_filter ClientTest.ColocatedTablesLookupTablet -n 100 -- -p 1
```

Reviewers: mbautin, bogdan

Reviewed By: mbautin, bogdan

Subscribers: ybase

Differential Revision: https://phabricator.dev.yugabyte.com/D10755
  • Loading branch information
ttyusupov committed Mar 5, 2021
1 parent e6cb5c7 commit 3a69097
Show file tree
Hide file tree
Showing 3 changed files with 124 additions and 19 deletions.
77 changes: 71 additions & 6 deletions src/yb/client/client-test.cc
Original file line number Diff line number Diff line change
Expand Up @@ -145,9 +145,17 @@ using std::shared_ptr;
using tablet::TabletPeer;
using tserver::MiniTabletServer;

namespace {

constexpr int32_t kNoBound = kint32max;
constexpr int kNumTablets = 2;

const std::string kKeyspaceName = "my_keyspace";
const std::string kPgsqlKeyspaceID = "1234";
const std::string kPgsqlKeyspaceName = "psql" + kKeyspaceName;

} // namespace

class ClientTest: public YBMiniClusterTestBase<MiniCluster> {
public:
ClientTest() {
Expand Down Expand Up @@ -195,7 +203,6 @@ class ClientTest: public YBMiniClusterTestBase<MiniCluster> {
}

protected:
static const string kKeyspaceName;
static const YBTableName kTableName;
static const YBTableName kTable2Name;
static const YBTableName kTable3Name;
Expand Down Expand Up @@ -468,7 +475,6 @@ class ClientTest: public YBMiniClusterTestBase<MiniCluster> {
};


const string ClientTest::kKeyspaceName("my_keyspace");
const YBTableName ClientTest::kTableName(YQL_DATABASE_CQL, kKeyspaceName, "client-testtb");
const YBTableName ClientTest::kTable2Name(YQL_DATABASE_CQL, kKeyspaceName, "client-testtb2");
const YBTableName ClientTest::kTable3Name(YQL_DATABASE_CQL, kKeyspaceName, "client-testtb3");
Expand Down Expand Up @@ -2257,8 +2263,6 @@ TEST_F(ClientTest, Capability) {

TEST_F(ClientTest, TestCreateTableWithRangePartition) {
std::unique_ptr<YBTableCreator> table_creator(client_->NewTableCreator());
const std::string kPgsqlKeyspaceID = "1234";
const std::string kPgsqlKeyspaceName = "psql" + kKeyspaceName;
const std::string kPgsqlTableName = "pgsqlrangepartitionedtable";
const std::string kPgsqlTableId = "pgsqlrangepartitionedtableid";
const size_t kColIdx = 1;
Expand Down Expand Up @@ -2390,8 +2394,6 @@ TEST_F(ClientTest, FlushTable) {
#endif // !defined(__clang__)

TEST_F(ClientTest, GetNamespaceInfo) {
const std::string kPgsqlKeyspaceID = "1234";
const std::string kPgsqlKeyspaceName = "psql" + kKeyspaceName;
GetNamespaceInfoResponsePB resp;

// Setup.
Expand Down Expand Up @@ -2515,5 +2517,68 @@ TEST_F(ClientTest, RefreshPartitions) {
LOG(INFO) << "num_lookups_done: " << num_lookups_done;
}

// There should be only one lookup RPC asking for colocated tables tablet locations.
// When we ask for tablet lookup for other tables colocated with the first one we asked, MetaCache
// should be able to respond without sending RPCs to master again.
TEST_F(ClientTest, ColocatedTablesLookupTablet) {
const auto kTabletLookupTimeout = 10s;
const auto kNumTables = 10;

YBTableName common_table_name(
YQLDatabase::YQL_DATABASE_PGSQL, kPgsqlKeyspaceID, kPgsqlKeyspaceName, "table_name");
ASSERT_OK(client_->CreateNamespace(
common_table_name.namespace_name(),
common_table_name.namespace_type(),
/* creator_role_name =*/ "",
common_table_name.namespace_id(),
/* source_namespace_id =*/ "",
/* next_pg_oid =*/ boost::none,
/* txn =*/ boost::none,
/* colocated =*/ true));

YBSchemaBuilder schemaBuilder;
schemaBuilder.AddColumn("key")->PrimaryKey()->Type(yb::INT64);
schemaBuilder.AddColumn("value")->Type(yb::INT64);
YBSchema schema;
ASSERT_OK(schemaBuilder.Build(&schema));

std::unique_ptr<YBTableCreator> table_creator(client_->NewTableCreator());
std::vector<YBTableName> table_names;
for (auto i = 0; i < kNumTables; ++i) {
const auto name = Format("table_$0", i);
auto table_name = common_table_name;
// Autogenerated ids will fail the IsPgsqlId() CHECKs so we need to generate oids.
table_name.set_table_id(GetPgsqlTableId(i, i));
table_name.set_table_name(name);
ASSERT_OK(table_creator->table_name(table_name)
.table_id(table_name.table_id())
.schema(&schema)
.set_range_partition_columns({"key"})
.table_type(PGSQL_TABLE_TYPE)
.num_tablets(1)
.Create());
table_names.push_back(table_name);
}

const auto lookup_serial_start = client::internal::TEST_GetLookupSerial();

TabletId colocated_tablet_id;
for (const auto& table_name : table_names) {
auto table = ASSERT_RESULT(client_->OpenTable(table_name));
const auto tablet_result = client_->LookupTabletByKeyFuture(
table, /* partition_key =*/ "",
CoarseMonoClock::now() + kTabletLookupTimeout).get();
const auto tablet_id = ASSERT_RESULT(tablet_result)->tablet_id();
if (colocated_tablet_id.empty()) {
colocated_tablet_id = tablet_id;
} else {
ASSERT_EQ(tablet_id, colocated_tablet_id);
}
}

const auto lookup_serial_stop = client::internal::TEST_GetLookupSerial();
ASSERT_EQ(lookup_serial_stop, lookup_serial_start + 1);
}

} // namespace client
} // namespace yb
64 changes: 51 additions & 13 deletions src/yb/client/meta_cache.cc
Original file line number Diff line number Diff line change
Expand Up @@ -136,6 +136,10 @@ std::atomic<int64_t> lookup_serial_{1};

} // namespace

int64_t TEST_GetLookupSerial() {
return lookup_serial_.load(std::memory_order_acquire);
}

////////////////////////////////////////////////////////////

RemoteTabletServer::RemoteTabletServer(const master::TSInfoPB& pb)
Expand Down Expand Up @@ -912,6 +916,7 @@ Status MetaCache::ProcessTabletLocations(
const google::protobuf::RepeatedPtrField<master::TabletLocationsPB>& locations,
boost::optional<PartitionListVersion> table_partition_list_version, LookupRpc* lookup_rpc) {
if (VLOG_IS_ON(2)) {
VLOG_WITH_FUNC(2) << "lookup_rpc: " << AsString(lookup_rpc);
for (const auto& loc : locations) {
for (const auto& table_id : loc.table_ids()) {
VLOG_WITH_FUNC(2) << loc.tablet_id() << ", " << table_id;
Expand All @@ -937,11 +942,39 @@ Status MetaCache::ProcessTabletLocations(
UpdateTabletServerUnlocked(r.ts_info());
}

VersionedTablePartitionListPtr colocated_table_partition_list;
if (loc.table_ids_size() > 1 && lookup_rpc && lookup_rpc->table()) {
// When table_ids_size() == 1 we only receive info for the single table from the master
// and we already have TableData initialized for it (this is done before sending an RPC to
// the master). And when table_ids_size() > 1, it means we got response for lookup RPC for
// co-located table and we can re-use TableData::partition_list from the table that was
// requested by MetaCache::LookupTabletByKey caller for other tables co-located with this
// one (since all co-located tables sharing the same set of tablets have the same table
// partition list and now we have list of them returned by the master).
const auto lookup_table_it = tables_.find(lookup_rpc->table()->id());
if (lookup_table_it != tables_.end()) {
colocated_table_partition_list = lookup_table_it->second.partition_list;
} else {
// We don't want to crash the server in that case for production, since this is not a
// correctness issue, but gives some performance degradation on first lookups for
// co-located tables.
// But we do want it to crash in debug, so we can more reliably catch this if it happens.
LOG(DFATAL) << Format(
"Internal error: got response for lookup RPC for co-located table, but MetaCache "
"table data wasn't initialized with partition list for this table. RPC: $0",
AsString(lookup_rpc));
}
}

for (const std::string& table_id : loc.table_ids()) {
auto& processed_table = processed_tables[table_id];
std::map<PartitionKey, RemoteTabletPtr>* tablets_by_key = nullptr;

const auto table_it = tables_.find(table_id);
auto table_it = tables_.find(table_id);
if (table_it == tables_.end() && loc.table_ids_size() > 1 &&
colocated_table_partition_list) {
table_it = InitTableDataUnlocked(table_id, colocated_table_partition_list);
}
if (table_it != tables_.end()) {
auto& table_data = table_it->second;

Expand Down Expand Up @@ -1083,6 +1116,9 @@ void MetaCache::MaybeUpdateClientRequests(const RemoteTablet& tablet) {

std::unordered_map<TableId, TableData>::iterator MetaCache::InitTableDataUnlocked(
const TableId& table_id, const VersionedTablePartitionListPtr& partitions) {
VLOG_WITH_FUNC(4) << Format(
"MetaCache($0) initializing TableData ($1 tables) for table $2: $3",
static_cast<void*>(this), tables_.size(), table_id, tables_.count(table_id));
return tables_.emplace(
std::piecewise_construct, std::forward_as_tuple(table_id),
std::forward_as_tuple(partitions)).first;
Expand Down Expand Up @@ -1747,7 +1783,7 @@ RemoteTabletPtr MetaCache::FastLookupTabletByKeyUnlocked(
// Fast path: lookup in the cache.
auto result = LookupTabletByKeyFastPathUnlocked(table_id, partition_start);
if (result && result->HasLeader()) {
VLOG(4) << "Fast lookup: found tablet " << result->tablet_id();
VLOG(5) << "Fast lookup: found tablet " << result->tablet_id();
return result;
}

Expand Down Expand Up @@ -1790,6 +1826,8 @@ bool MetaCache::DoLookupTabletByKey(
auto table_it = tables_.find(table->id());
TableData* table_data;
if (table_it == tables_.end()) {
VLOG_WITH_FUNC(4) << Format(
"MetaCache($0) missed table_id $1", static_cast<void*>(this), table->id());
if (!IsUniqueLock(&lock)) {
return false;
}
Expand Down Expand Up @@ -1832,20 +1870,20 @@ bool MetaCache::DoLookupTabletByKey(
int64_t expected = 0;
if (!lookups_group->running_request_number.compare_exchange_strong(
expected, request_no, std::memory_order_acq_rel)) {
VLOG_WITH_FUNC(4)
VLOG_WITH_FUNC(5)
<< "Lookup is already running for table: " << table->ToString()
<< ", partition_group_start: " << Slice(**partition_group_start).ToDebugHexString();
return true;
}
}

VLOG_WITH_FUNC(4)
<< "Start lookup for table: " << table->ToString()
<< ", partition_group_start: " << Slice(**partition_group_start).ToDebugHexString();

auto rpc = std::make_shared<LookupByKeyRpc>(
this, table, VersionedPartitionGroupStartKey{*partition_group_start, partitions->version},
request_no, deadline);
VLOG_WITH_FUNC(4)
<< "Started lookup for table: " << table->ToString()
<< ", partition_group_start: " << Slice(**partition_group_start).ToDebugHexString()
<< ", rpc: " << AsString(rpc);
rpcs_.RegisterAndStart(rpc, rpc->RpcHandle());
return true;
}
Expand Down Expand Up @@ -1882,7 +1920,7 @@ bool MetaCache::DoLookupAllTablets(const std::shared_ptr<const YBTable>& table,
int64_t expected = 0;
if (!full_table_lookups.running_request_number.compare_exchange_strong(
expected, request_no, std::memory_order_acq_rel)) {
VLOG_WITH_FUNC(4)
VLOG_WITH_FUNC(5)
<< "Lookup is already running for table: " << table->ToString();
return true;
}
Expand All @@ -1903,7 +1941,7 @@ void MetaCache::LookupTabletByKey(const std::shared_ptr<const YBTable>& table,
LookupTabletCallback callback) {
const auto table_partition_list = table->GetVersionedPartitions();
const auto partition_start = client::FindPartitionStart(table_partition_list, partition_key);
VLOG_WITH_FUNC(4) << "Table: " << table->ToString()
VLOG_WITH_FUNC(5) << "Table: " << table->ToString()
<< ", partition_list_version: " << table_partition_list->version
<< ", partition_key: " << Slice(partition_key).ToDebugHexString()
<< ", partition_start: " << Slice(*partition_start).ToDebugHexString();
Expand Down Expand Up @@ -1963,11 +2001,11 @@ bool MetaCache::DoLookupTabletById(
// Fast path: lookup in the cache.
tablet = LookupTabletByIdFastPathUnlocked(tablet_id);
if (tablet) {
VLOG(4) << "Fast lookup: candidate tablet " << AsString(tablet);
VLOG(5) << "Fast lookup: candidate tablet " << AsString(tablet);
if (use_cache && tablet->HasLeader()) {
// tablet->HasLeader() check makes MetaCache send RPC to master in case of no tablet with
// tablet_id is found on all replicas.
VLOG(4) << "Fast lookup: found tablet " << tablet->tablet_id();
VLOG(5) << "Fast lookup: found tablet " << tablet->tablet_id();
return true;
}
lookups_without_new_replicas = tablet->lookups_without_new_replicas();
Expand All @@ -1991,7 +2029,7 @@ bool MetaCache::DoLookupTabletById(
int64_t expected = 0;
if (!lookup->running_request_number.compare_exchange_strong(
expected, request_no, std::memory_order_acq_rel)) {
VLOG_WITH_FUNC(4) << "Lookup already running for tablet: " << tablet_id;
VLOG_WITH_FUNC(5) << "Lookup already running for tablet: " << tablet_id;
return true;
}
}
Expand All @@ -2009,7 +2047,7 @@ void MetaCache::LookupTabletById(const TabletId& tablet_id,
CoarseTimePoint deadline,
LookupTabletCallback callback,
UseCache use_cache) {
VLOG_WITH_FUNC(4) << "(" << tablet_id << ", " << use_cache << ")";
VLOG_WITH_FUNC(5) << "(" << tablet_id << ", " << use_cache << ")";

if (DoLookupTabletById<SharedLock<decltype(mutex_)>>(
tablet_id, table, deadline, use_cache, &callback)) {
Expand Down
2 changes: 2 additions & 0 deletions src/yb/client/meta_cache.h
Original file line number Diff line number Diff line change
Expand Up @@ -721,6 +721,8 @@ class MetaCache : public RefCountedThreadSafe<MetaCache> {
DISALLOW_COPY_AND_ASSIGN(MetaCache);
};

int64_t TEST_GetLookupSerial();

} // namespace internal
} // namespace client
} // namespace yb
Expand Down

0 comments on commit 3a69097

Please sign in to comment.