diff --git a/src/messaging/ExchangeMessageDispatch.cpp b/src/messaging/ExchangeMessageDispatch.cpp index 11707f895a3099..d683b10f47b9b0 100644 --- a/src/messaging/ExchangeMessageDispatch.cpp +++ b/src/messaging/ExchangeMessageDispatch.cpp @@ -50,7 +50,7 @@ CHIP_ERROR ExchangeMessageDispatch::SendMessage(SessionHandle session, uint16_t payloadHeader.SetExchangeID(exchangeId).SetMessageType(protocol, type).SetInitiator(isInitiator); // If there is a pending acknowledgment piggyback it on this message. - if (reliableMessageContext->IsAckPending()) + if (reliableMessageContext->HasPiggybackAckPending()) { payloadHeader.SetAckMessageCounter(reliableMessageContext->TakePendingPeerAckMessageCounter()); diff --git a/src/messaging/ReliableMessageContext.cpp b/src/messaging/ReliableMessageContext.cpp index cedd923495b062..fe45f08052b73c 100644 --- a/src/messaging/ReliableMessageContext.cpp +++ b/src/messaging/ReliableMessageContext.cpp @@ -136,6 +136,11 @@ CHIP_ERROR ReliableMessageContext::FlushAcks() return err; } +bool ReliableMessageContext::HasPiggybackAckPending() const +{ + return mFlags.Has(Flags::kFlagAckMessageCounterIsValid); +} + uint64_t ReliableMessageContext::GetInitialRetransmitTimeoutTick() { return mConfig.mInitialRetransTimeoutTick; @@ -213,6 +218,8 @@ CHIP_ERROR ReliableMessageContext::HandleNeedsAckInner(uint32_t messageCounter, // Is there pending ack for a different message counter. bool wasAckPending = IsAckPending() && mPendingPeerAckMessageCounter != messageCounter; + bool messageCounterWasValid = HasPiggybackAckPending(); + // Temporary store currently pending ack message counter (even if there is none). uint32_t tempAckMessageCounter = mPendingPeerAckMessageCounter; @@ -228,6 +235,16 @@ CHIP_ERROR ReliableMessageContext::HandleNeedsAckInner(uint32_t messageCounter, // Restore previously pending ack message counter. SetPendingPeerAckMessageCounter(tempAckMessageCounter); } + else if (messageCounterWasValid) + { + // Restore the previous value, so later piggybacks will pick it up, + // but don't set out "ack is pending" state, because we didn't use + // to have it set. + mPendingPeerAckMessageCounter = tempAckMessageCounter; + } + // Otherwise don't restore the invalid old mPendingPeerAckMessageCounter + // value, so we preserve the invariant that once we have had an ack + // pending we always have a valid mPendingPeerAckMessageCounter. return err; } @@ -296,6 +313,7 @@ void ReliableMessageContext::SetPendingPeerAckMessageCounter(uint32_t aPeerAckMe { mPendingPeerAckMessageCounter = aPeerAckMessageCounter; SetAckPending(true); + mFlags.Set(Flags::kFlagAckMessageCounterIsValid); } } // namespace Messaging diff --git a/src/messaging/ReliableMessageContext.h b/src/messaging/ReliableMessageContext.h index 410d1d01173bcb..606d7c781116c7 100644 --- a/src/messaging/ReliableMessageContext.h +++ b/src/messaging/ReliableMessageContext.h @@ -58,9 +58,10 @@ class ReliableMessageContext CHIP_ERROR FlushAcks(); /** - * Take the pending peer ack message counter from the context. This must only be called - * when IsAckPending() is true. After this call, IsAckPending() will be - * false; it's the caller's responsibility to send the ack. + * Take the pending peer ack message counter from the context. This must + * only be called when HasPiggybackAckPending() is true. After this call, + * IsAckPending() will be false; it's the caller's responsibility to send + * the ack. */ uint32_t TakePendingPeerAckMessageCounter() { @@ -68,6 +69,13 @@ class ReliableMessageContext return mPendingPeerAckMessageCounter; } + /** + * Check whether we have an ack to piggyback on the message we are sending. + * If true, TakePendingPeerAckMessageCounter will return a valid value that + * should be included as an ack in the message. + */ + bool HasPiggybackAckPending() const; + /** * Get the initial retransmission interval. It would be the time to wait before * retransmission after first failure. @@ -183,33 +191,39 @@ class ReliableMessageContext enum class Flags : uint16_t { /// When set, signifies that this context is the initiator of the exchange. - kFlagInitiator = 0x0001, + kFlagInitiator = (1u << 0), /// When set, signifies that a response is expected for a message that is being sent. - kFlagResponseExpected = 0x0002, + kFlagResponseExpected = (1u << 1), /// When set, automatically request an acknowledgment whenever a message is sent via UDP. - kFlagAutoRequestAck = 0x0004, + kFlagAutoRequestAck = (1u << 2), /// Internal and debug only: when set, the exchange layer does not send an acknowledgment. - kFlagDropAckDebug = 0x0008, + kFlagDropAckDebug = (1u << 3), /// When set, signifies current reliable message context is in usage. - kFlagOccupied = 0x0010, + kFlagOccupied = (1u << 4), /// When set, signifies that there is an acknowledgment pending to be sent back. - kFlagAckPending = 0x0020, + kFlagAckPending = (1u << 5), + + /// When set, signifies that there has once been an acknowledgment + /// pending to be sent back. In that case, + /// mPendingPeerAckMessageCounter is a valid message counter value for + /// some message we have needed to acknowledge in the past. + kFlagAckMessageCounterIsValid = (1u << 6), /// When set, signifies that at least one message has been received from peer on this exchange context. - kFlagMsgRcvdFromPeer = 0x0040, + kFlagMsgRcvdFromPeer = (1u << 7), /// When set, signifies that this exchange is waiting for a call to SendMessage. - kFlagWillSendMessage = 0x0080, + kFlagWillSendMessage = (1u << 8), /// When set, signifies that we are currently in the middle of HandleMessage. - kFlagHandlingMessage = 0x0100, + kFlagHandlingMessage = (1u << 9), /// When set, we have had Close() or Abort() called on us already. - kFlagClosed = 0x0200, + kFlagClosed = (1u << 10), }; BitFlags mFlags; // Internal state flags diff --git a/src/messaging/ReliableMessageMgr.cpp b/src/messaging/ReliableMessageMgr.cpp index 98a2a21985c34b..d8040b9fbef269 100644 --- a/src/messaging/ReliableMessageMgr.cpp +++ b/src/messaging/ReliableMessageMgr.cpp @@ -268,7 +268,16 @@ CHIP_ERROR ReliableMessageMgr::AddToRetransTable(ReliableMessageContext * rc, Re bool added = false; CHIP_ERROR err = CHIP_NO_ERROR; - VerifyOrDie(rc != nullptr && !rc->IsOccupied()); + VerifyOrDie(rc != nullptr); + + if (rc->IsOccupied()) + { + // This can happen if we have a misbehaving peer that is not sending + // acks with its application-level responses when it should, so we end + // up with two outstanding app-level messages both waiting for an ack. + // Just give up and error out in that case. + return CHIP_ERROR_INCORRECT_STATE; + } for (RetransTableEntry & entry : mRetransTable) { diff --git a/src/messaging/tests/TestReliableMessageProtocol.cpp b/src/messaging/tests/TestReliableMessageProtocol.cpp index a3989259e99e9e..6d4f864dee5a8f 100644 --- a/src/messaging/tests/TestReliableMessageProtocol.cpp +++ b/src/messaging/tests/TestReliableMessageProtocol.cpp @@ -76,7 +76,7 @@ class MockAppDelegate : public ExchangeDelegate if (mDropAckResponse) { auto * rc = ec->GetReliableMessageContext(); - if (rc->IsAckPending()) + if (rc->HasPiggybackAckPending()) { (void) rc->TakePendingPeerAckMessageCounter(); } @@ -798,7 +798,7 @@ void CheckReceiveAfterStandaloneAck(nlTestSuite * inSuite, void * inContext) NL_TEST_ASSERT(inSuite, rm->TestGetCountRetransTable() == 0); } -void CheckNoPiggybackAfterPiggyback(nlTestSuite * inSuite, void * inContext) +void CheckPiggybackAfterPiggyback(nlTestSuite * inSuite, void * inContext) { TestContext & ctx = *reinterpret_cast(inContext); @@ -912,10 +912,9 @@ void CheckNoPiggybackAfterPiggyback(nlTestSuite * inSuite, void * inContext) NL_TEST_ASSERT(inSuite, gLoopback.mSentMessageCount == 5); NL_TEST_ASSERT(inSuite, gLoopback.mDroppedMessageCount == 0); - // Ensure that we have received that response and it did not have a piggyback - // ack. + // Ensure that we have received that response and it had a piggyback ack. NL_TEST_ASSERT(inSuite, mockSender.IsOnMessageReceivedCalled); - NL_TEST_ASSERT(inSuite, !mockSender.mReceivedPiggybackAck); + NL_TEST_ASSERT(inSuite, mockSender.mReceivedPiggybackAck); err = ctx.GetExchangeManager().UnregisterUnsolicitedMessageHandlerForType(Echo::MsgType::EchoRequest); NL_TEST_ASSERT(inSuite, err == CHIP_NO_ERROR); @@ -1262,6 +1261,141 @@ void CheckLostResponseWithPiggyback(nlTestSuite * inSuite, void * inContext) NL_TEST_ASSERT(inSuite, rm->TestGetCountRetransTable() == 0); } +void CheckLostStandaloneAck(nlTestSuite * inSuite, void * inContext) +{ + /** + * This tests the following scenario: + * 1) A reliable message is sent from initiator to responder. + * 2) The responder sends a standalone ack, which is lost. + * 3) The responder sends an application-level response. + * 4) The initiator sends a reliable response to the app-level response. + * + * This should succeed, with all application-level messages being delivered + * and no crashes. + */ + TestContext & ctx = *reinterpret_cast(inContext); + + chip::System::PacketBufferHandle buffer = chip::MessagePacketBuffer::NewWithData(PAYLOAD, sizeof(PAYLOAD)); + NL_TEST_ASSERT(inSuite, !buffer.IsNull()); + + CHIP_ERROR err = CHIP_NO_ERROR; + + MockAppDelegate mockReceiver; + err = ctx.GetExchangeManager().RegisterUnsolicitedMessageHandlerForType(Echo::MsgType::EchoRequest, &mockReceiver); + NL_TEST_ASSERT(inSuite, err == CHIP_NO_ERROR); + + mockReceiver.mTestSuite = inSuite; + + MockAppDelegate mockSender; + ExchangeContext * exchange = ctx.NewExchangeToAlice(&mockSender); + NL_TEST_ASSERT(inSuite, exchange != nullptr); + + mockSender.mTestSuite = inSuite; + + ReliableMessageMgr * rm = ctx.GetExchangeManager().GetReliableMessageMgr(); + NL_TEST_ASSERT(inSuite, rm != nullptr); + + // Ensure the retransmit table is empty right now + NL_TEST_ASSERT(inSuite, rm->TestGetCountRetransTable() == 0); + + // We send a message, the other side sends a standalone ack first (which is + // lost), then an application response, then we respond to that response. + // We need to keep both exchanges alive for that (so we can send the + // response from the receiver and so the initial sender exchange can send a + // response to that). + gLoopback.mSentMessageCount = 0; + gLoopback.mNumMessagesToDrop = 0; + gLoopback.mDroppedMessageCount = 0; + mockReceiver.mRetainExchange = true; + mockSender.mRetainExchange = true; + + // And ensure the ack heading back our way is dropped. + mockReceiver.mDropAckResponse = true; + + err = exchange->SendMessage(Echo::MsgType::EchoRequest, std::move(buffer), SendFlags(SendMessageFlags::kExpectResponse)); + NL_TEST_ASSERT(inSuite, err == CHIP_NO_ERROR); + + // Ensure the message was sent. + NL_TEST_ASSERT(inSuite, gLoopback.mSentMessageCount == 1); + NL_TEST_ASSERT(inSuite, gLoopback.mDroppedMessageCount == 0); + + // And that it was received. + NL_TEST_ASSERT(inSuite, mockReceiver.IsOnMessageReceivedCalled); + + // And that we have not gotten any app-level responses or acks so far. + NL_TEST_ASSERT(inSuite, !mockSender.IsOnMessageReceivedCalled); + NL_TEST_ASSERT(inSuite, rm->TestGetCountRetransTable() == 1); + + ReliableMessageContext * receiverRc = mockReceiver.mExchange->GetReliableMessageContext(); + // Ack should have been dropped. + NL_TEST_ASSERT(inSuite, !receiverRc->IsAckPending()); + + // Don't drop any more acks. + mockReceiver.mDropAckResponse = false; + + // Now send a message from the other side. + buffer = chip::MessagePacketBuffer::NewWithData(PAYLOAD, sizeof(PAYLOAD)); + NL_TEST_ASSERT(inSuite, !buffer.IsNull()); + + mockReceiver.mExchange->SendMessage(Echo::MsgType::EchoResponse, std::move(buffer), + SendFlags(SendMessageFlags::kExpectResponse)); + + // Ensure the response was sent. + NL_TEST_ASSERT(inSuite, gLoopback.mSentMessageCount == 2); + NL_TEST_ASSERT(inSuite, gLoopback.mDroppedMessageCount == 0); + + // Ensure that we have received that response and had a piggyback ack. + NL_TEST_ASSERT(inSuite, mockSender.IsOnMessageReceivedCalled); + NL_TEST_ASSERT(inSuite, mockSender.mReceivedPiggybackAck); + // We now have just the received message waiting for an ack. + NL_TEST_ASSERT(inSuite, rm->TestGetCountRetransTable() == 1); + + // Reset various state so we can measure things again. + mockReceiver.IsOnMessageReceivedCalled = false; + mockReceiver.mReceivedPiggybackAck = false; + mockSender.IsOnMessageReceivedCalled = false; + mockSender.mReceivedPiggybackAck = false; + + // Stop retaining the recipient exchange. + mockReceiver.mRetainExchange = false; + + // Now send a new message to the other side. + buffer = chip::MessagePacketBuffer::NewWithData(PAYLOAD, sizeof(PAYLOAD)); + NL_TEST_ASSERT(inSuite, !buffer.IsNull()); + + err = exchange->SendMessage(Echo::MsgType::EchoRequest, std::move(buffer)); + NL_TEST_ASSERT(inSuite, err == CHIP_NO_ERROR); + + // Ensure the message and the standalone ack to it were sent. + NL_TEST_ASSERT(inSuite, gLoopback.mSentMessageCount == 4); + NL_TEST_ASSERT(inSuite, gLoopback.mDroppedMessageCount == 0); + + // And that it was received. + NL_TEST_ASSERT(inSuite, mockReceiver.IsOnMessageReceivedCalled); + NL_TEST_ASSERT(inSuite, mockReceiver.mReceivedPiggybackAck); + + // And that receiver has no ack pending. + NL_TEST_ASSERT(inSuite, !receiverRc->IsAckPending()); + + // And that there are no un-acked messages left. + NL_TEST_ASSERT(inSuite, rm->TestGetCountRetransTable() == 0); +} + +/** + * TODO: A test that we should have but can't write with the existing + * infrastructure we have: + * + * 1. A sends message 1 to B + * 2. B is slow to respond, A does a resend and the resend is delayed in the network. + * 3. B responds with message 2, which acks message 1. + * 4. A sends message 3 to B + * 5. B sends standalone ack to message 3, which is lost + * 6. The duplicate message from step 3 is delivered and triggers a standalone ack. + * 7. B responds with message 4, which should carry a piggyback ack for message 3 + * (this is the part that needs testing!) + * 8. A sends message 5 to B. + */ + // Test Suite /** @@ -1280,12 +1414,13 @@ const nlTest sTests[] = NL_TEST_DEF("Test ReliableMessageMgr::CheckDuplicateMessage", CheckDuplicateMessage), NL_TEST_DEF("Test ReliableMessageMgr::CheckDuplicateMessageClosedExchange", CheckDuplicateMessageClosedExchange), NL_TEST_DEF("Test that a reply after a standalone ack comes through correctly", CheckReceiveAfterStandaloneAck), - NL_TEST_DEF("Test that a reply to a non-MRP message does not piggyback an ack even if there were MRP things happening on the context before", CheckNoPiggybackAfterPiggyback), + NL_TEST_DEF("Test that a reply to a non-MRP message piggybacks an ack if there were MRP things happening on the context before", CheckPiggybackAfterPiggyback), NL_TEST_DEF("Test sending an unsolicited ack-soliciting 'standalone ack' message", CheckSendUnsolicitedStandaloneAckMessage), NL_TEST_DEF("Test ReliableMessageMgr::CheckSendStandaloneAckMessage", CheckSendStandaloneAckMessage), NL_TEST_DEF("Test command, response, default response, with receiver closing exchange after sending response", CheckMessageAfterClosed), NL_TEST_DEF("Test that unencrypted message is dropped if exchange requires encryption", CheckUnencryptedMessageReceiveFailure), NL_TEST_DEF("Test that dropping an application-level message with a piggyback ack works ok once both sides retransmit", CheckLostResponseWithPiggyback), + NL_TEST_DEF("Test that an application-level response-to-response after a lost standalone ack to the initial message works", CheckLostStandaloneAck), NL_TEST_SENTINEL() };