From 0307df397ec13e61e98a88af277074507895ad93 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Damian=20Kr=C3=B3lik?= <66667989+Damian-Nordic@users.noreply.github.com> Date: Mon, 10 Jan 2022 18:27:06 +0100 Subject: [PATCH] [mrp] Fix MRP after a recent refactoring (#13413) PR 11988 has broken StartTimer call in ReliableMessageMgr by passing a timestamp instead of a delay. Consequently, retransmissions stopped working. Also, it was not caught because unit tests call the timeout function explicitly instead of relying on the system layer. Fix the issue and enhance unit tests. Additionally, fix a crash on access to AsSecureSession() if a retransmission occurs during PASE or CASE. --- src/controller/CHIPDeviceController.cpp | 1 + src/messaging/ReliableMessageMgr.cpp | 7 +- src/messaging/tests/MessagingContext.h | 2 + .../tests/TestReliableMessageProtocol.cpp | 66 +++++++++---------- .../raw/tests/NetworkTestHelpers.cpp | 4 +- 5 files changed, 43 insertions(+), 37 deletions(-) diff --git a/src/controller/CHIPDeviceController.cpp b/src/controller/CHIPDeviceController.cpp index 1cda177019ed19..6ca2ba0719d2bd 100644 --- a/src/controller/CHIPDeviceController.cpp +++ b/src/controller/CHIPDeviceController.cpp @@ -272,6 +272,7 @@ void DeviceController::OnFirstMessageDeliveryFailed(const SessionHandle & sessio { VerifyOrReturn(mState == State::Initialized, ChipLogError(Controller, "OnFirstMessageDeliveryFailed was called in incorrect state")); + VerifyOrReturn(session->GetSessionType() == Transport::Session::SessionType::kSecure); UpdateDevice(session->AsSecureSession()->GetPeerNodeId()); } diff --git a/src/messaging/ReliableMessageMgr.cpp b/src/messaging/ReliableMessageMgr.cpp index 4c696e891d7cd1..4869be51e4dbc9 100644 --- a/src/messaging/ReliableMessageMgr.cpp +++ b/src/messaging/ReliableMessageMgr.cpp @@ -35,6 +35,8 @@ #include #include +using namespace chip::System::Clock::Literals; + namespace chip { namespace Messaging { @@ -308,7 +310,10 @@ void ReliableMessageMgr::StartTimer() #endif StopTimer(); - VerifyOrDie(mSystemLayer->StartTimer(nextWakeTime, Timeout, this) == CHIP_NO_ERROR); + + const System::Clock::Timestamp now = System::SystemClock().GetMonotonicTimestamp(); + const auto nextWakeDelay = (nextWakeTime > now) ? nextWakeTime - now : 0_ms; + VerifyOrDie(mSystemLayer->StartTimer(nextWakeDelay, Timeout, this) == CHIP_NO_ERROR); } else { diff --git a/src/messaging/tests/MessagingContext.h b/src/messaging/tests/MessagingContext.h index be14c0a3f172fc..768fde0abb26f5 100644 --- a/src/messaging/tests/MessagingContext.h +++ b/src/messaging/tests/MessagingContext.h @@ -185,6 +185,8 @@ class LoopbackMessagingContext : public MessagingContext TransportMgrBase & GetTransportMgr() { return mTransportManager; } + IOContext & GetIOContext() { return mIOContext; } + /* * For unit-tests that simulate end-to-end transmission and reception of messages in loopback mode, * this mode better replicates a real-functioning stack that correctly handles the processing diff --git a/src/messaging/tests/TestReliableMessageProtocol.cpp b/src/messaging/tests/TestReliableMessageProtocol.cpp index 981c019880ed72..4eaa0f6f5c9d77 100644 --- a/src/messaging/tests/TestReliableMessageProtocol.cpp +++ b/src/messaging/tests/TestReliableMessageProtocol.cpp @@ -227,9 +227,8 @@ void CheckResendApplicationMessage(nlTestSuite * inSuite, void * inContext) NL_TEST_ASSERT(inSuite, gLoopback.mDroppedMessageCount == 1); NL_TEST_ASSERT(inSuite, rm->TestGetCountRetransTable() == 1); - // sleep 65 ms to trigger first re-transmit - chip::test_utils::SleepMillis(65); - ReliableMessageMgr::Timeout(&ctx.GetSystemLayer(), rm); + // Wait for the first re-transmit (should take 64ms) + ctx.GetIOContext().DriveIOUntil(1000_ms32, [] { return gLoopback.mSentMessageCount >= 2; }); ctx.DrainAndServiceIO(); // Ensure the retransmit message was dropped, and is still there in the retransmit table @@ -238,9 +237,8 @@ void CheckResendApplicationMessage(nlTestSuite * inSuite, void * inContext) NL_TEST_ASSERT(inSuite, gLoopback.mDroppedMessageCount == 2); NL_TEST_ASSERT(inSuite, rm->TestGetCountRetransTable() == 1); - // sleep another 65 ms to trigger second re-transmit - chip::test_utils::SleepMillis(65); - ReliableMessageMgr::Timeout(&ctx.GetSystemLayer(), rm); + // Wait for the second re-transmit (should take 64ms) + ctx.GetIOContext().DriveIOUntil(1000_ms32, [] { return gLoopback.mSentMessageCount >= 3; }); ctx.DrainAndServiceIO(); // Ensure the retransmit message was NOT dropped, and the retransmit table is empty, as we should have gotten an ack @@ -290,9 +288,8 @@ void CheckCloseExchangeAndResendApplicationMessage(nlTestSuite * inSuite, void * NL_TEST_ASSERT(inSuite, gLoopback.mDroppedMessageCount == 1); NL_TEST_ASSERT(inSuite, rm->TestGetCountRetransTable() == 1); - // sleep 65 ms to trigger first re-transmit - chip::test_utils::SleepMillis(65); - ReliableMessageMgr::Timeout(&ctx.GetSystemLayer(), rm); + // Wait for the first re-transmit (should take 64ms) + ctx.GetIOContext().DriveIOUntil(1000_ms32, [] { return gLoopback.mSentMessageCount >= 2; }); ctx.DrainAndServiceIO(); // Ensure the retransmit message was dropped, and is still there in the retransmit table @@ -301,9 +298,8 @@ void CheckCloseExchangeAndResendApplicationMessage(nlTestSuite * inSuite, void * NL_TEST_ASSERT(inSuite, gLoopback.mDroppedMessageCount == 2); NL_TEST_ASSERT(inSuite, rm->TestGetCountRetransTable() == 1); - // sleep another 65 ms to trigger second re-transmit - chip::test_utils::SleepMillis(65); - ReliableMessageMgr::Timeout(&ctx.GetSystemLayer(), rm); + // Wait for the second re-transmit (should take 64ms) + ctx.GetIOContext().DriveIOUntil(1000_ms32, [] { return gLoopback.mSentMessageCount >= 3; }); ctx.DrainAndServiceIO(); // Ensure the retransmit message was NOT dropped, and the retransmit table is empty, as we should have gotten an ack @@ -349,9 +345,8 @@ void CheckFailedMessageRetainOnSend(nlTestSuite * inSuite, void * inContext) // Ensure the message was dropped NL_TEST_ASSERT(inSuite, gLoopback.mDroppedMessageCount == 1); - // sleep 65 ms to trigger first re-transmit - chip::test_utils::SleepMillis(65); - ReliableMessageMgr::Timeout(&ctx.GetSystemLayer(), rm); + // Wait for the first re-transmit (should take 64ms) + ctx.GetIOContext().DriveIOUntil(1000_ms32, [] { return gLoopback.mSentMessageCount >= 2; }); ctx.DrainAndServiceIO(); // Ensure the retransmit table is empty, as we did not provide a message to retain @@ -441,9 +436,8 @@ void CheckResendApplicationMessageWithPeerExchange(nlTestSuite * inSuite, void * NL_TEST_ASSERT(inSuite, rm->TestGetCountRetransTable() == 1); NL_TEST_ASSERT(inSuite, !mockReceiver.IsOnMessageReceivedCalled); - // sleep 65 ms to trigger first re-transmit - chip::test_utils::SleepMillis(65); - ReliableMessageMgr::Timeout(&ctx.GetSystemLayer(), rm); + // Wait for the first re-transmit (should take 64ms) + ctx.GetIOContext().DriveIOUntil(1000_ms32, [] { return gLoopback.mSentMessageCount >= 2; }); ctx.DrainAndServiceIO(); // Ensure the retransmit message was not dropped, and is no longer in the retransmit table @@ -513,9 +507,8 @@ void CheckDuplicateMessageClosedExchange(nlTestSuite * inSuite, void * inContext err = ctx.GetExchangeManager().UnregisterUnsolicitedMessageHandlerForType(Echo::MsgType::EchoRequest); NL_TEST_ASSERT(inSuite, err == CHIP_NO_ERROR); - // sleep 65 ms to trigger first re-transmit - chip::test_utils::SleepMillis(65); - ReliableMessageMgr::Timeout(&ctx.GetSystemLayer(), rm); + // Wait for the first re-transmit and ack (should take 64ms) + ctx.GetIOContext().DriveIOUntil(1000_ms32, [] { return gLoopback.mSentMessageCount >= 3; }); ctx.DrainAndServiceIO(); // Ensure the retransmit message was sent and the ack was sent @@ -574,9 +567,8 @@ void CheckResendSessionEstablishmentMessageWithPeerExchange(nlTestSuite * inSuit NL_TEST_ASSERT(inSuite, rm->TestGetCountRetransTable() == 1); NL_TEST_ASSERT(inSuite, !mockReceiver.IsOnMessageReceivedCalled); - // sleep 65 ms to trigger first re-transmit - chip::test_utils::SleepMillis(65); - ReliableMessageMgr::Timeout(&ctx.GetSystemLayer(), rm); + // Wait for the first re-transmit (should take 64ms) + inctx.GetIOContext().DriveIOUntil(1000_ms32, [] { return gLoopback.mSentMessageCount >= 2; }); inctx.DrainAndServiceIO(); // Ensure the retransmit message was not dropped, and is no longer in the retransmit table @@ -649,9 +641,8 @@ void CheckDuplicateMessage(nlTestSuite * inSuite, void * inContext) mockReceiver.mDropAckResponse = false; mockReceiver.mRetainExchange = false; - // sleep 65 ms to trigger first re-transmit - chip::test_utils::SleepMillis(65); - ReliableMessageMgr::Timeout(&ctx.GetSystemLayer(), rm); + // Wait for the first re-transmit and ack (should take 64ms) + ctx.GetIOContext().DriveIOUntil(1000_ms32, [] { return gLoopback.mSentMessageCount >= 3; }); ctx.DrainAndServiceIO(); // Ensure the retransmit message was sent and the ack was sent @@ -1186,9 +1177,8 @@ void CheckLostResponseWithPiggyback(nlTestSuite * inSuite, void * inContext) mockSender.IsOnMessageReceivedCalled = false; mockSender.mReceivedPiggybackAck = false; - // sleep 65 ms to trigger re-transmit from sender - chip::test_utils::SleepMillis(65); - ReliableMessageMgr::Timeout(&ctx.GetSystemLayer(), rm); + // Wait for re-transmit from sender and ack (should take 64ms) + ctx.GetIOContext().DriveIOUntil(1000_ms32, [] { return gLoopback.mSentMessageCount >= 4; }); ctx.DrainAndServiceIO(); // We resent our first message, which did not make it to the app-level @@ -1216,9 +1206,8 @@ void CheckLostResponseWithPiggyback(nlTestSuite * inSuite, void * inContext) NL_TEST_ASSERT(inSuite, rm->TestGetCountRetransTable() == 0); } - // sleep 65*3 ms to trigger re-transmit from receiver - chip::test_utils::SleepMillis(65 * 3); - ReliableMessageMgr::Timeout(&ctx.GetSystemLayer(), rm); + // Wait for re-transmit from receiver (should take 256ms) + ctx.GetIOContext().DriveIOUntil(1000_ms32, [] { return gLoopback.mSentMessageCount >= 6; }); ctx.DrainAndServiceIO(); // And now we've definitely resent our response message, which should show @@ -1355,6 +1344,14 @@ void CheckLostStandaloneAck(nlTestSuite * inSuite, void * inContext) NL_TEST_ASSERT(inSuite, rm->TestGetCountRetransTable() == 0); } +int InitializeTestCase(void * inContext) +{ + TestContext & ctx = *static_cast(inContext); + ctx.GetSessionAliceToBob()->AsSecureSession()->SetMRPConfig(gDefaultMRPConfig); + ctx.GetSessionBobToAlice()->AsSecureSession()->SetMRPConfig(gDefaultMRPConfig); + return SUCCESS; +} + /** * TODO: A test that we should have but can't write with the existing * infrastructure we have: @@ -1403,7 +1400,8 @@ nlTestSuite sSuite = "Test-CHIP-ReliableMessageProtocol", &sTests[0], TestContext::InitializeAsync, - TestContext::Finalize + TestContext::Finalize, + InitializeTestCase, }; // clang-format on diff --git a/src/transport/raw/tests/NetworkTestHelpers.cpp b/src/transport/raw/tests/NetworkTestHelpers.cpp index 02c08a7df72fe1..b29c33bc85de25 100644 --- a/src/transport/raw/tests/NetworkTestHelpers.cpp +++ b/src/transport/raw/tests/NetworkTestHelpers.cpp @@ -57,8 +57,8 @@ CHIP_ERROR IOContext::Shutdown() void IOContext::DriveIO() { - // Set the select timeout to 100ms - constexpr uint32_t kSleepTimeMilliseconds = 100; + // Set the select timeout to 10ms + constexpr uint32_t kSleepTimeMilliseconds = 10; ServiceEvents(kSleepTimeMilliseconds); }