FATAL: partition_key not in partition hash_split #6890

ttyusupov · 2021-01-15T19:25:08Z

yb-tserver crashed with the following FATAL log:

F20210115 17:10:59 ../../src/yb/client/batcher.cc:438] Batcher (0x0000000007126b00): Row QL_READ client: YQL_CLIENT_CQL
request_id: 16168549383682975901
schema_version: 0
hash_code: 1581
hashed_column_values {
  value {
    string_value: "key:141921"
  }
}
selected_exprs {
  column_id: 10
}
selected_exprs {
  column_id: 11
}
limit: 5000
return_paging_state: true
distinct: false
max_hash_code: 1581
column_refs {
  ids: 10
  ids: 11
}
query_id: 98995456
rsrow_desc {
  rscol_descs {
    name: "k"
    ql_type {
      main: STRING
    }
  }
  rscol_descs {
    name: "v"
    ql_type {
      main: BINARY
    }
  }
}
is_forward_scan: true
is_aggregate: false
 not in partition hash_split: [0x0000, 0x05AC) partition_key: '062D'
    @     0x7f7a4f1771a3  yb::LogFatalHandlerSink::send(int, char const*, char const*, int, tm const*, char const*, unsigned long) (src/yb/util/logging.cc:474)
    @     0x7f7a4e34bcad
    @     0x7f7a4e348e39
    @     0x7f7a4e349062
    @     0x7f7a55227e2f  yb::client::internal::Batcher::TabletLookupFinished(shared_ptr<yb::client::internal::InFlightOp>, yb::Result<scoped_refptr<yb::client::internal::RemoteTablet> > const&) (src/yb/client/batcher.cc:438)
    @     0x7f7a5522a81f  void std::_Mem_fn_base<void (yb::client::internal::Batcher::*)(shared_ptr<yb::client::internal::InFlightOp>, yb::Result<scoped_refptr<yb::client::internal::RemoteTablet> > const&), true>::_M_call<scoped_refptr<yb::client::internal::Batcher>&, shared_ptr<yb::client::internal::InFlightOp>&, yb::Result<scoped_refptr<yb::client::internal::RemoteTablet> > const&>(scoped_refptr<yb::client::internal::Batcher>&, void const volatile*, shared_ptr<yb::client::internal::InFlightOp>&, yb::Result<scoped_refptr<yb::client::internal::RemoteTablet> > const&) const (gcc/5.5.0_4/include/c++/5.5.0/functional:634)
    @     0x7f7a5522a81f  void std::_Mem_fn_base<void (yb::client::internal::Batcher::*)(shared_ptr<yb::client::internal::InFlightOp>, yb::Result<scoped_refptr<yb::client::internal::RemoteTablet> > const&), true>::operator()<scoped_refptr<yb::client::internal::Batcher>&, shared_ptr<yb::client::internal::InFlightOp>&, yb::Result<scoped_refptr<yb::client::internal::RemoteTablet> > const&, void>(scoped_refptr<yb::client::internal::Batcher>&, shared_ptr<yb::client::internal::InFlightOp>&, yb::Result<scoped_refptr<yb::client::internal::RemoteTablet> > const&) const (gcc/5.5.0_4/include/c++/5.5.0/functional:610)
    @     0x7f7a5522a81f  void std::_Bind<std::_Mem_fn<void (yb::client::internal::Batcher::*)(shared_ptr<yb::client::internal::InFlightOp>, yb::Result<scoped_refptr<yb::client::internal::RemoteTablet> > const&)> (scoped_refptr<yb::client::internal::Batcher>, shared_ptr<yb::client::internal::InFlightOp>, std::_Placeholder<1>)>::__call<void, yb::Result<scoped_refptr<yb::client::internal::RemoteTablet> > const&, 0ul, 1ul, 2ul>(tuple<yb::Result<scoped_refptr<yb::client::internal::RemoteTablet> > const&>&&, std::_Index_tuple<0ul, 1ul, 2ul>) (gcc/5.5.0_4/include/c++/5.5.0/functional:1074)
    @     0x7f7a5522a81f  void std::_Bind<std::_Mem_fn<void (yb::client::internal::Batcher::*)(shared_ptr<yb::client::internal::InFlightOp>, yb::Result<scoped_refptr<yb::client::internal::RemoteTablet> > const&)> (scoped_refptr<yb::client::internal::Batcher>, shared_ptr<yb::client::internal::InFlightOp>, std::_Placeholder<1>)>::operator()<yb::Result<scoped_refptr<yb::client::internal::RemoteTablet> > const&, void>(yb::Result<scoped_refptr<yb::client::internal::RemoteTablet> > const&) (gcc/5.5.0_4/include/c++/5.5.0/functional:1133)
    @     0x7f7a5522a81f  std::_Function_handler<void (yb::Result<scoped_refptr<yb::client::internal::RemoteTablet> > const&), std::_Bind<std::_Mem_fn<void (yb::client::internal::Batcher::*)(shared_ptr<yb::client::internal::InFlightOp>, yb::Result<scoped_refptr<yb::client::internal::RemoteTablet> > const&)> (scoped_refptr<yb::client::internal::Batcher>, shared_ptr<yb::client::internal::InFlightOp>, std::_Placeholder<1>)> >::_M_invoke(std::_Any_data const&, yb::Result<scoped_refptr<yb::client::internal::RemoteTablet> > const&) (gcc/5.5.0_4/include/c++/5.5.0/functional:1871)
    @     0x7f7a552b6d86  std::function<void (yb::Result<scoped_refptr<yb::client::internal::RemoteTablet> > const&)>::operator()(yb::Result<scoped_refptr<yb::client::internal::RemoteTablet> > const&) const (gcc/5.5.0_4/include/c++/5.5.0/functional:2267)
    @     0x7f7a552c1fc6  bool yb::client::internal::MetaCache::DoLookupTabletByKey<yb::SharedLock<boost::shared_mutex> >(shared_ptr<yb::client::YBTable const> const&, shared_ptr<std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char> > const> const&, std::chrono::time_point<yb::CoarseMonoClock, std::chrono::duration<long, std::ratio<1l, 1000000000l> > >, std::function<void (yb::Result<scoped_refptr<yb::client::internal::RemoteTablet> > const&)>*, shared_ptr<std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char> > const>*)::{lambda()#1}::operator()() const (src/yb/client/meta_cache.cc:1569)
    @     0x7f7a552c6a1d  yb::ScopeExitLambda<bool yb::client::internal::MetaCache::DoLookupTabletByKey<yb::SharedLock<boost::shared_mutex> >(shared_ptr<yb::client::YBTable const> const&, shared_ptr<std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char> > const> const&, std::chrono::time_point<yb::CoarseMonoClock, std::chrono::duration<long, std::ratio<1l, 1000000000l> > >, std::function<void (yb::Result<scoped_refptr<yb::client::internal::RemoteTablet> > const&)>*, shared_ptr<std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char> > const>*)::{lambda()#1}>::~ScopeExitLambda() (src/yb/util/scope_exit.h:34)
    @     0x7f7a552c6a1d  bool yb::client::internal::MetaCache::DoLookupTabletByKey<yb::SharedLock<boost::shared_mutex> >(shared_ptr<yb::client::YBTable const> const&, shared_ptr<std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char> > const> const&, std::chrono::time_point<yb::CoarseMonoClock, std::chrono::duration<long, std::ratio<1l, 1000000000l> > >, std::function<void (yb::Result<scoped_refptr<yb::client::internal::RemoteTablet> > const&)>*, shared_ptr<std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char> > const>*) (src/yb/client/meta_cache.cc:1571)
    @     0x7f7a552b4a15  yb::client::internal::MetaCache::LookupTabletByKey(shared_ptr<yb::client::YBTable const> const&, std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char> > const&, std::chrono::time_point<yb::CoarseMonoClock, std::chrono::duration<long, std::ratio<1l, 1000000000l> > >, std::function<void (yb::Result<scoped_refptr<yb::client::internal::RemoteTablet> > const&)>) (src/yb/client/meta_cache.cc:1680)
    @     0x7f7a552290e4  yb::client::internal::Batcher::Add(shared_ptr<yb::client::YBOperation>) (src/yb/client/batcher.cc:343)
    @     0x7f7a552d1d88  yb::client::YBSession::Apply(shared_ptr<yb::client::YBOperation>) (src/yb/client/session.cc:225)
    @     0x7f7a59398db5  yb::ql::Executor::AddOperation(shared_ptr<yb::client::YBqlReadOp> const&, yb::ql::TnodeContext*) (src/yb/yql/cql/ql/exec/executor.cc:2174)
    @     0x7f7a593a0d4b  yb::ql::Executor::ExecPTNode(yb::ql::PTSelectStmt const*, yb::ql::TnodeContext*) (src/yb/yql/cql/ql/exec/executor.cc:997)
    @     0x7f7a593a67ee  yb::ql::Executor::ExecTreeNode(yb::ql::TreeNode const*) (src/yb/yql/cql/ql/exec/executor.cc:287)
    @     0x7f7a593a69a3  yb::ql::Executor::Execute(yb::ql::ParseTree const&, yb::ql::StatementParameters const&) (src/yb/yql/cql/ql/exec/executor.cc:187)
    @     0x7f7a593a6cfc  yb::ql::Executor::ExecuteAsync(yb::ql::ParseTree const&, yb::ql::StatementParameters const&, yb::Callback<void (yb::Status const&, shared_ptr<yb::ql::ExecutedResult> const&)>) (src/yb/yql/cql/ql/exec/executor.cc:97)
    @     0x7f7a597e84f4  yb::ql::QLProcessor::ExecuteAsync(yb::ql::ParseTree const&, yb::ql::StatementParameters const&, yb::Callback<void (yb::Status const&, shared_ptr<yb::ql::ExecutedResult> const&)>) (src/yb/yql/cql/ql/ql_processor.cc:415)
    @     0x7f7a597e0960  yb::ql::Statement::ExecuteAsync(yb::ql::QLProcessor*, yb::ql::StatementParameters const&, yb::Callback<void (yb::Status const&, shared_ptr<yb::ql::ExecutedResult> const&)>) const (src/yb/yql/cql/ql/statement.cc:90)
    @     0x7f7a5a5a4415  yb::cqlserver::CQLProcessor::ProcessRequest(yb::cqlserver::ExecuteRequest const&) (src/yb/yql/cql/cqlserver/cql_processor.cc:373)
    @     0x7f7a5a5a5515  yb::cqlserver::CQLProcessor::ProcessRequest(yb::cqlserver::CQLRequest const&) (src/yb/yql/cql/cqlserver/cql_processor.cc:270)
    @     0x7f7a5a5a5980  yb::cqlserver::CQLProcessor::ProcessCall(shared_ptr<yb::rpc::InboundCall>) (src/yb/yql/cql/cqlserver/cql_processor.cc:199)
    @     0x7f7a5a5c1fb1  yb::cqlserver::CQLServiceImpl::Handle(shared_ptr<yb::rpc::InboundCall>) (src/yb/yql/cql/cqlserver/cql_service.cc:209)

The text was updated successfully, but these errors were encountered:

ttyusupov · 2021-01-15T19:29:37Z

I was testing replaying SPLIT_OP during bootstrap with the following scenario:

./bin/yb-ctl --rf=3 create --num_shards_per_tserver=1 --ysql_num_shards_per_tserver=1 --master_flags '"tablet_split_size_threshold_bytes=2000000"' --tserver_flags '"memstore_size_mb=1"'
ps -ef | grep yb-tserver
kill <tserver_3_pid>
restart tserver_3 manually with --TEST_crash_before_apply_tablet_split_op=true
java -jar ~/code/yb-sample-apps/target/yb-sample-apps.jar --workload CassandraBatchKeyValue --nodes 127.0.0.1:9042 --num_threads_read 4 --num_threads_write 4 --nouuid --num_unique_keys 100000000 --num_writes 100000000
wait until tserver_3 is crashed
restart tserver_3 manually without TEST_crash_before_apply_tablet_split_op=true

After that, I tried to pause and then resume tserver 1 and 2, trying to make tserver 3 to become the leader for most of the tablets.

First, paused tserver 1, so it lost leadership. Then resumed tserver 1 and paused tserver 2, so it lost leadership. Then resumed tserver 2.

After that I was doing some random stop/resume (and might be kill) yb-tserver. And finally got that crash.

ttyusupov · 2021-01-15T19:34:35Z

The node that crashed was tserver 3 (but might be some other nodes crashed too, don't have full logs now).

ttyusupov · 2021-01-20T18:15:11Z

Easier reproducer:

./bin/yb-ctl --rf=1 create --num_shards_per_tserver=1 --ysql_num_shards_per_tserver=1 --master_flags '"tablet_split_size_threshold_bytes=300000","enable_tracing=true"' --tserver_flags '"db_write_buffer_size=100000"'

java -jar ~/code/yb-sample-apps/target/yb-sample-apps.jar --workload CassandraSecondaryIndex --nodes 127.0.0.1:9042 --num_threads_read 2 --num_threads_write 2 --num_unique_keys 10000000 --nouuid

Summary: The problem description: - To resolve partition_key to tablet_id `MetaCache` was using `YBTable::FindPartitionStart` and then translating `partition_start_key` to `tablet_id` based on `MetaCache::TableData::tablets_by_partition`. Because the key of the `tablets_by_partition` map is the first (lowest) key of the partition, the same partition start key can be mapped to different tablet ids before and after splitting. - `YBTable::partitions` update is also invalidating `MetaCache` for this table - this would be sufficient if we only had a single `YBTable` instance for the table, because in this case `YBTable::partitions` would never be older than the partitions version used to fill `MetaCache::TableData::tablets_by_partition`. - But, it turned out we can have more than one `YBTable` instance for the same table and use them concurrently to send requests to t-server. This can cause the following scenario: 1) `yb_table_1`, `yb_table_2` are initialized and getting initial table partition containing tablet `T` serving keys `10..20`. 2) Some data is written into the Table, `MetaCache::TableData::tablets_by_partition` is mapping `partition_start_key = 10` to tablet `T`. 3) Tablet `T` is split into `T1` (keys `10..14`) and `T2` (keys `15..20`). 4) `yb_table_1` is used to send a request for the key `15` belonging now to `T2`. First, the request is getting to tablet `T` due to outdated `yb_table_1::partitions` and `MetaCache::TableData::tablets_by_partition`. Tablet `T` returns that it has been split or deleted (if it was deleted, `TabletInvoker` is trying to get new tablet T locations from master using `YBClient::LookupTabletById` and getting new table_partitions_version). In both cases, this leads to refreshing `yb_table_1` table partitions and invalidation of `MetaCache`, then request is finally getting to tablet `T2`. 5) `yb_table_2` is used to send a request for the key `15`. `yb_table_2::partitions` is still old, so it `yb_table_2::FindPartitionStart` returns `0` that is translated by updated `MetaCache::TableData::tablets_by_partition` into `T1` and `Batcher` is trying to route request to `T1` instead of `T2`. Solution: - Add `TableData::partitions` that is versioned partitions. - Maintain `TableData::tablets_by_partition` and `TableData::tablet_lookups_by_group` to correspond to the version of `TableData::partitions`. - Use `TableData::partitions` instead of `YBTable::partitions` for getting `partition_start_key` based on key we need to route to correct tablet. - Make sure the same table partitions version is used during the single cycle of resolving the key to the tablet and if this condition is violated, return an error that will trigger table partitions and `MetaCache::TableData` invalidation and update. - Add `RemoteTablet::last_known_partition_version` that is updated on each GetTable(t)Locations response from master and it means this is table partitions version we know tablet was serving data for the table. - If in step 4) from the problem description we get "tablet not found" error from the master - we compare `RemoteTablet::last_known_partition_version` with table partitions_version from the master response and trigger table partitions refresh on `YBClient` side (that also invalidates `MetaCache` for this table). Problem #2: - It can happen that between we send `LookupByKeyRpc` for specific `partition_group_start` key and receive back the response, `MetaCache` for this table is updated with new partitions list and therefore partition groups will change, so we can't use information from response to this RPC to provide data to lookups interested in partition group starting with `partition_group_start` key. - To resolve this, defined `LookupByKeyRpc::partition_group_start_` as `VersionedPartitionGroupStartKey` type that includes partition list version and implemented checks for all 3 versions to match (from `LookupByKeyRpc`, from `MetaCache::TableData` and from response to RPC). Test Plan: ``` ./bin/yb-ctl --rf=1 create --num_shards_per_tserver=1 --ysql_num_shards_per_tserver=1 --master_flags '"tablet_split_size_threshold_bytes=300000","enable_tracing=true"' --tserver_flags '"db_write_buffer_size=100000"' java -jar ~/code/yb-sample-apps/target/yb-sample-apps.jar --workload CassandraSecondaryIndex --nodes 127.0.0.1:9042 --num_threads_read 2 --num_threads_write 2 --num_unique_keys 10000000 --nouuid ``` Reviewers: sergei, bogdan, rsami, mbautin Reviewed By: mbautin Subscribers: mbautin, ybase Differential Revision: https://phabricator.dev.yugabyte.com/D10424

…itions list for co-located tables Summary: As a result of the fix for #6890 there was introduced a potential perf issue for the first lookup of tablet by key for colocated tables. Instead of sending 1 RPC when doing first lookup for colocated table and then reusing the result for all tables co-located with the first one, MetaCache is sending 1 more RPC each time another table co-located with the first one is queried to resolve tablet by key. Since all colocated tables share the same tablet, we can cache the locations on the first RPC to any co-located table and then reuse the result for any MetaCache::LookupTabletByKey calls for any other table co-located with the one already queried. Suppose we have colocated tables `Table1` and `Table2` sharing `Tablet0`, then behavior without the fix is the following: 1. Someone calls `MetaCache::LookupTabletByKey` for `Table1` and `partition_key=p` 2. `MetaCache` checks that it doesn’t have `TableData` for `Table1`, initializes `TableData` for `Table1` with the list of partitions for `Table1`, and sends an RPC to the master 3. Master returns tablet locations that contain tablet locations for both `Table1` and `Table2`, because they are colocated and share the same tablets set 4. `MetaCache` updates `TableData::tablets_by_partition` for `Table1` 5. Caller gets `Tablet0` as a response to `MetaCache::LookupTabletByKey` 6. Someone calls `MetaCache::LookupTabletByKey` for `Table2` and `partition_key=p` 7. `MetaCache` checks that it doesn’t have `TableData` for `Table2` and sends an RPC to the master And with the fix, at step 4 `MetaCache` will also initialize `TableData` for `Table2` using the same partitions list which was used for `Table1` and will update `TableData::tablets_by_partition` for both tables. So, at step 7, `MetaCache` will have `TableData` for `Table2` and will respond with the tablet without RPC to the master. - Fixed `MetaCache::ProcessTabletLocations` to reuse partitions list for co-located tables - Added ClientTest.ColocatedTablesLookupTablet - Moved most frequent VLOGS from level 4 to level 5 for `MetaCache` Test Plan: For ASAN/TSAN/release/debug: ``` ybd --gtest_filter ClientTest.ColocatedTablesLookupTablet -n 100 -- -p 1 ``` Reviewers: mbautin, bogdan Reviewed By: mbautin, bogdan Subscribers: ybase Differential Revision: https://phabricator.dev.yugabyte.com/D10755

…resh Summary: The problem description: - To resolve partition_key to tablet_id `MetaCache` was using `YBTable::FindPartitionStart` and then translating `partition_start_key` to `tablet_id` based on `MetaCache::TableData::tablets_by_partition`. Because the key of the `tablets_by_partition` map is the first (lowest) key of the partition, the same partition start key can be mapped to different tablet ids before and after splitting. - `YBTable::partitions` update is also invalidating `MetaCache` for this table - this would be sufficient if we only had a single `YBTable` instance for the table, because in this case `YBTable::partitions` would never be older than the partitions version used to fill `MetaCache::TableData::tablets_by_partition`. - But, it turned out we can have more than one `YBTable` instance for the same table and use them concurrently to send requests to t-server. This can cause the following scenario: 1) `yb_table_1`, `yb_table_2` are initialized and getting initial table partition containing tablet `T` serving keys `10..20`. 2) Some data is written into the Table, `MetaCache::TableData::tablets_by_partition` is mapping `partition_start_key = 10` to tablet `T`. 3) Tablet `T` is split into `T1` (keys `10..14`) and `T2` (keys `15..20`). 4) `yb_table_1` is used to send a request for the key `15` belonging now to `T2`. First, the request is getting to tablet `T` due to outdated `yb_table_1::partitions` and `MetaCache::TableData::tablets_by_partition`. Tablet `T` returns that it has been split or deleted (if it was deleted, `TabletInvoker` is trying to get new tablet T locations from master using `YBClient::LookupTabletById` and getting new table_partitions_version). In both cases, this leads to refreshing `yb_table_1` table partitions and invalidation of `MetaCache`, then request is finally getting to tablet `T2`. 5) `yb_table_2` is used to send a request for the key `15`. `yb_table_2::partitions` is still old, so it `yb_table_2::FindPartitionStart` returns `0` that is translated by updated `MetaCache::TableData::tablets_by_partition` into `T1` and `Batcher` is trying to route request to `T1` instead of `T2`. Solution: - Add `TableData::partitions` that is versioned partitions. - Maintain `TableData::tablets_by_partition` and `TableData::tablet_lookups_by_group` to correspond to the version of `TableData::partitions`. - Use `TableData::partitions` instead of `YBTable::partitions` for getting `partition_start_key` based on key we need to route to correct tablet. - Make sure the same table partitions version is used during the single cycle of resolving the key to the tablet and if this condition is violated, return an error that will trigger table partitions and `MetaCache::TableData` invalidation and update. - Add `RemoteTablet::last_known_partition_version` that is updated on each GetTable(t)Locations response from master and it means this is table partitions version we know tablet was serving data for the table. - If in step 4) from the problem description we get "tablet not found" error from the master - we compare `RemoteTablet::last_known_partition_version` with table partitions_version from the master response and trigger table partitions refresh on `YBClient` side (that also invalidates `MetaCache` for this table). Problem yugabyte#2: - It can happen that between we send `LookupByKeyRpc` for specific `partition_group_start` key and receive back the response, `MetaCache` for this table is updated with new partitions list and therefore partition groups will change, so we can't use information from response to this RPC to provide data to lookups interested in partition group starting with `partition_group_start` key. - To resolve this, defined `LookupByKeyRpc::partition_group_start_` as `VersionedPartitionGroupStartKey` type that includes partition list version and implemented checks for all 3 versions to match (from `LookupByKeyRpc`, from `MetaCache::TableData` and from response to RPC). Test Plan: ``` ./bin/yb-ctl --rf=1 create --num_shards_per_tserver=1 --ysql_num_shards_per_tserver=1 --master_flags '"tablet_split_size_threshold_bytes=300000","enable_tracing=true"' --tserver_flags '"db_write_buffer_size=100000"' java -jar ~/code/yb-sample-apps/target/yb-sample-apps.jar --workload CassandraSecondaryIndex --nodes 127.0.0.1:9042 --num_threads_read 2 --num_threads_write 2 --num_unique_keys 10000000 --nouuid ``` Reviewers: sergei, bogdan, rsami, mbautin Reviewed By: mbautin Subscribers: mbautin, ybase Differential Revision: https://phabricator.dev.yugabyte.com/D10424

…use partitions list for co-located tables Summary: As a result of the fix for yugabyte#6890 there was introduced a potential perf issue for the first lookup of tablet by key for colocated tables. Instead of sending 1 RPC when doing first lookup for colocated table and then reusing the result for all tables co-located with the first one, MetaCache is sending 1 more RPC each time another table co-located with the first one is queried to resolve tablet by key. Since all colocated tables share the same tablet, we can cache the locations on the first RPC to any co-located table and then reuse the result for any MetaCache::LookupTabletByKey calls for any other table co-located with the one already queried. Suppose we have colocated tables `Table1` and `Table2` sharing `Tablet0`, then behavior without the fix is the following: 1. Someone calls `MetaCache::LookupTabletByKey` for `Table1` and `partition_key=p` 2. `MetaCache` checks that it doesn’t have `TableData` for `Table1`, initializes `TableData` for `Table1` with the list of partitions for `Table1`, and sends an RPC to the master 3. Master returns tablet locations that contain tablet locations for both `Table1` and `Table2`, because they are colocated and share the same tablets set 4. `MetaCache` updates `TableData::tablets_by_partition` for `Table1` 5. Caller gets `Tablet0` as a response to `MetaCache::LookupTabletByKey` 6. Someone calls `MetaCache::LookupTabletByKey` for `Table2` and `partition_key=p` 7. `MetaCache` checks that it doesn’t have `TableData` for `Table2` and sends an RPC to the master And with the fix, at step 4 `MetaCache` will also initialize `TableData` for `Table2` using the same partitions list which was used for `Table1` and will update `TableData::tablets_by_partition` for both tables. So, at step 7, `MetaCache` will have `TableData` for `Table2` and will respond with the tablet without RPC to the master. - Fixed `MetaCache::ProcessTabletLocations` to reuse partitions list for co-located tables - Added ClientTest.ColocatedTablesLookupTablet - Moved most frequent VLOGS from level 4 to level 5 for `MetaCache` Test Plan: For ASAN/TSAN/release/debug: ``` ybd --gtest_filter ClientTest.ColocatedTablesLookupTablet -n 100 -- -p 1 ``` Reviewers: mbautin, bogdan Reviewed By: mbautin, bogdan Subscribers: ybase Differential Revision: https://phabricator.dev.yugabyte.com/D10755

ttyusupov added kind/bug This issue is a bug area/docdb YugabyteDB core features labels Jan 15, 2021

ttyusupov self-assigned this Jan 15, 2021

ttyusupov closed this as completed Feb 19, 2021

ttyusupov mentioned this issue Mar 1, 2021

MetaCache can issue excessive RPCs when looking for co-located table tablet by key #7413

Closed

amitanandaiyer mentioned this issue Sep 5, 2024

[DocDB] MetaCache: Do not call callbacks under a lock #23747

Closed

1 task

Provide feedback

Saved searches

Use saved searches to filter your results more quickly

FATAL: partition_key not in partition hash_split #6890

FATAL: partition_key not in partition hash_split #6890

ttyusupov commented Jan 15, 2021

ttyusupov commented Jan 15, 2021

ttyusupov commented Jan 15, 2021

ttyusupov commented Jan 20, 2021 •

edited

Loading

FATAL: partition_key not in partition hash_split #6890

FATAL: partition_key not in partition hash_split #6890

Comments

ttyusupov commented Jan 15, 2021

ttyusupov commented Jan 15, 2021

ttyusupov commented Jan 15, 2021

ttyusupov commented Jan 20, 2021 • edited Loading

ttyusupov commented Jan 20, 2021 •

edited

Loading