Skip to content
This repository has been archived by the owner on Jun 23, 2022. It is now read-only.

feat(split): add check state task for split context #743

Merged
merged 2 commits into from
Mar 2, 2021
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
10 changes: 9 additions & 1 deletion src/replica/replica_context.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -244,14 +244,22 @@ bool potential_secondary_context::is_cleaned()
bool partition_split_context::cleanup(bool force)
{
CLEANUP_TASK(async_learn_task, force)
if (!force) {
CLEANUP_TASK_ALWAYS(check_state_task)
} else {
CLEANUP_TASK(check_state_task, force)
}

parent_gpid.set_app_id(0);
is_prepare_list_copied = false;
is_caught_up = false;
return true;
}

bool partition_split_context::is_cleaned() const { return async_learn_task == nullptr; }
bool partition_split_context::is_cleaned() const
{
return async_learn_task == nullptr && check_state_task == nullptr;
}

} // namespace replication
} // namespace dsn
6 changes: 5 additions & 1 deletion src/replica/replica_context.h
Original file line number Diff line number Diff line change
Expand Up @@ -244,7 +244,11 @@ class partition_split_context
bool is_caught_up{false};

// child replica async learn parent states
dsn::task_ptr async_learn_task;
task_ptr async_learn_task;

// partition split states checker, start when initialize child replica
// see more in function `child_check_split_context` and `parent_check_states`
task_ptr check_state_task;
};

//---------------inline impl----------------------------------------------------------------
Expand Down
34 changes: 34 additions & 0 deletions src/replica/split/replica_split_manager.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -121,6 +121,12 @@ void replica_split_manager::child_init_replica(gpid parent_gpid,
_replica->_split_states.parent_gpid = parent_gpid;
_replica->_split_states.is_prepare_list_copied = false;
_replica->_split_states.is_caught_up = false;
_replica->_split_states.check_state_task =
tasking::enqueue(LPC_PARTITION_SPLIT,
tracker(),
std::bind(&replica_split_manager::child_check_split_context, this),
get_gpid().thread_hash(),
std::chrono::seconds(3));
// TODO(heyuchen): add other states

ddebug_replica(
Expand All @@ -137,6 +143,34 @@ void replica_split_manager::child_init_replica(gpid parent_gpid,
}
}

// ThreadPool: THREAD_POOL_REPLICATION
void replica_split_manager::child_check_split_context() // on child partition
{
FAIL_POINT_INJECT_F("replica_child_check_split_context", [](dsn::string_view) {});

if (status() != partition_status::PS_PARTITION_SPLIT) {
derror_replica("wrong status({})", enum_to_string(status()));
_replica->_split_states.check_state_task = nullptr;
return;
}
// let parent partition check its status
error_code ec = _stub->split_replica_exec(
LPC_PARTITION_SPLIT,
_replica->_split_states.parent_gpid,
std::bind(&replica_split_manager::parent_check_states, std::placeholders::_1));
if (ec != ERR_OK) {
child_handle_split_error("check_child_state failed because parent gpid is invalid");
return;
}

_replica->_split_states.check_state_task =
tasking::enqueue(LPC_PARTITION_SPLIT,
tracker(),
std::bind(&replica_split_manager::child_check_split_context, this),
get_gpid().thread_hash(),
std::chrono::seconds(3));
}

// ThreadPool: THREAD_POOL_REPLICATION
bool replica_split_manager::parent_check_states() // on parent partition
{
Expand Down
2 changes: 2 additions & 0 deletions src/replica/split/replica_split_manager.h
Original file line number Diff line number Diff line change
Expand Up @@ -123,6 +123,8 @@ class replica_split_manager : replica_base

// return true if parent status is valid
bool parent_check_states();
// check if child status is valid
void child_check_split_context();

// parent reset child information when partition split failed
void parent_cleanup_split_context();
Expand Down
25 changes: 25 additions & 0 deletions src/replica/split/test/replica_split_test.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -202,6 +202,21 @@ class replica_split_test : public replica_test_base
_parent_replica->tracker()->wait_outstanding_tasks();
}

void test_child_init_replica()
{
_child_replica = stub->generate_replica(
_app_info, CHILD_GPID, partition_status::PS_INACTIVE, INIT_BALLOT);
_child_split_mgr = make_unique<replica_split_manager>(_child_replica.get());
_child_split_mgr->child_init_replica(PARENT_GPID, PRIMARY, INIT_BALLOT);
// check_state_task will cost 3 seconds, cancel it immediatly
bool finished = false;
_child_replica->_split_states.check_state_task->cancel(false, &finished);
if (finished) {
_child_replica->_split_states.check_state_task = nullptr;
}
_child_replica->tracker()->wait_outstanding_tasks();
}

bool test_parent_check_states()
{
bool flag = _parent_split_mgr->parent_check_states();
Expand Down Expand Up @@ -526,6 +541,16 @@ TEST_F(replica_split_test, parent_start_split_tests)
}
}

// child_init_replica test
TEST_F(replica_split_test, child_init_replica_test)
{
fail::cfg("replica_stub_split_replica_exec", "return()");
test_child_init_replica();
ASSERT_EQ(_child_replica->status(), partition_status::PS_PARTITION_SPLIT);
ASSERT_FALSE(child_is_prepare_list_copied());
ASSERT_FALSE(child_is_caught_up());
}

// parent_check_states tests
TEST_F(replica_split_test, parent_check_states_tests)
{
Expand Down