From ac9531306534f341f678ef04bd8b2944e4e1331c Mon Sep 17 00:00:00 2001 From: Arkadiusz Bokowy Date: Wed, 4 Jan 2023 17:36:46 +0100 Subject: [PATCH] Fix mutex use after destroy upon stack shutdown (#24191) * Fix mutex use after destroy upon stack shutdown Shutting down CHIP stack when the event loop is still running might lead to use after free for objects used in CHIP main event loop. This commit fixes this by not running pychip_DeviceController_StackShutdown() on the CHIP thread, but in the main thread of Python application. * Account for externally managed threads which are joined outside of the POSIX platform manager implementation class. * java: Stop IO thread before shutting down commissioner --- .../java/CHIPDeviceController-JNI.cpp | 34 +++++++++--- src/controller/python/chip/ChipStack.py | 5 +- .../GenericPlatformManagerImpl_POSIX.h | 19 +++---- .../GenericPlatformManagerImpl_POSIX.ipp | 52 ++++++++++++------- 4 files changed, 69 insertions(+), 41 deletions(-) diff --git a/src/controller/java/CHIPDeviceController-JNI.cpp b/src/controller/java/CHIPDeviceController-JNI.cpp index bed3ffda5f0c42..85c88c48eacf38 100644 --- a/src/controller/java/CHIPDeviceController-JNI.cpp +++ b/src/controller/java/CHIPDeviceController-JNI.cpp @@ -74,6 +74,7 @@ using namespace chip::Credentials; #define CDC_JNI_CALLBACK_LOCAL_REF_COUNT 256 static void * IOThreadMain(void * arg); +static CHIP_ERROR StopIOThread(); static CHIP_ERROR N2J_PaseVerifierParams(JNIEnv * env, jlong setupPincode, jbyteArray pakeVerifier, jobject & outParams); static CHIP_ERROR N2J_NetworkLocation(JNIEnv * env, jstring ipAddress, jint port, jint interfaceIndex, jobject & outLocation); static CHIP_ERROR GetChipPathIdValue(jobject chipPathId, uint32_t wildcardValue, uint32_t & outValue); @@ -147,14 +148,10 @@ void JNI_OnUnload(JavaVM * jvm, void * reserved) chip::DeviceLayer::StackLock lock; ChipLogProgress(Controller, "JNI_OnUnload() called"); - // If the IO thread has been started, shut it down and wait for it to exit. - if (sIOThread != PTHREAD_NULL) - { - chip::DeviceLayer::PlatformMgr().StopEventLoopTask(); - - chip::DeviceLayer::StackUnlock unlock; - pthread_join(sIOThread, NULL); - } + // If the IO thread has not been stopped yet, shut it down now. + // TODO(arkq): Maybe we should just assert here, as the IO thread + // should be stopped before the library is unloaded. + StopIOThread(); sJVM = NULL; @@ -1020,6 +1017,10 @@ JNI_METHOD(void, shutdownCommissioning) (JNIEnv * env, jobject self, jlong handle) { chip::DeviceLayer::StackLock lock; + + // Stop the IO thread, so that the controller can be safely shut down. + StopIOThread(); + AndroidDeviceControllerWrapper * wrapper = AndroidDeviceControllerWrapper::FromJNIHandle(handle); wrapper->Controller()->Shutdown(); } @@ -1410,6 +1411,23 @@ void * IOThreadMain(void * arg) return NULL; } +// NOTE: This function SHALL be called with the stack lock held. +CHIP_ERROR StopIOThread() +{ + if (sIOThread != PTHREAD_NULL) + { + ChipLogProgress(Controller, "IO thread stopping"); + chip::DeviceLayer::StackUnlock unlock; + + chip::DeviceLayer::PlatformMgr().StopEventLoopTask(); + + pthread_join(sIOThread, NULL); + sIOThread = PTHREAD_NULL; + } + + return CHIP_NO_ERROR; +} + CHIP_ERROR N2J_PaseVerifierParams(JNIEnv * env, jlong setupPincode, jbyteArray paseVerifier, jobject & outParams) { CHIP_ERROR err = CHIP_NO_ERROR; diff --git a/src/controller/python/chip/ChipStack.py b/src/controller/python/chip/ChipStack.py index 3ca313be08214d..fabf43e4110c5e 100644 --- a/src/controller/python/chip/ChipStack.py +++ b/src/controller/python/chip/ChipStack.py @@ -328,7 +328,10 @@ def setLogFunct(self, logFunct): self._ChipStackLib.pychip_Stack_SetLogFunct(logFunct) def Shutdown(self): - self.Call(lambda: self._ChipStackLib.pychip_DeviceController_StackShutdown()).raise_on_error() + # + # Terminate Matter thread and shutdown the stack. + # + self._ChipStackLib.pychip_DeviceController_StackShutdown() # # We only shutdown the persistent storage layer AFTER we've shut down the stack, diff --git a/src/include/platform/internal/GenericPlatformManagerImpl_POSIX.h b/src/include/platform/internal/GenericPlatformManagerImpl_POSIX.h index 7fd248fdf699d9..acdd2a4d3b762c 100644 --- a/src/include/platform/internal/GenericPlatformManagerImpl_POSIX.h +++ b/src/include/platform/internal/GenericPlatformManagerImpl_POSIX.h @@ -55,24 +55,19 @@ class GenericPlatformManagerImpl_POSIX : public GenericPlatformManagerImpl mState{ State::kStopped }; pthread_cond_t mEventQueueStoppedCond; pthread_mutex_t mStateLock; - // - // TODO: This variable is very similar to mMainLoopIsStarted, track the - // cleanup and consolidation in this issue: - // - bool mEventQueueHasStopped = false; - pthread_attr_t mChipTaskAttr; struct sched_param mChipTaskSchedParam; @@ -109,7 +104,7 @@ class GenericPlatformManagerImpl_POSIX : public GenericPlatformManagerImpl mShouldRunEventLoop; + std::atomic mShouldRunEventLoop{ true }; static void * EventLoopTaskMain(void * arg); }; diff --git a/src/include/platform/internal/GenericPlatformManagerImpl_POSIX.ipp b/src/include/platform/internal/GenericPlatformManagerImpl_POSIX.ipp index 4a52152d91d64f..6050950bf4c261 100644 --- a/src/include/platform/internal/GenericPlatformManagerImpl_POSIX.ipp +++ b/src/include/platform/internal/GenericPlatformManagerImpl_POSIX.ipp @@ -59,16 +59,12 @@ CHIP_ERROR GenericPlatformManagerImpl_POSIX::_InitChipStack() // Call up to the base class _InitChipStack() to perform the bulk of the initialization. ReturnErrorOnFailure(GenericPlatformManagerImpl::_InitChipStack()); - mShouldRunEventLoop.store(true, std::memory_order_relaxed); - int ret = pthread_cond_init(&mEventQueueStoppedCond, nullptr); VerifyOrReturnError(ret == 0, CHIP_ERROR_POSIX(ret)); ret = pthread_mutex_init(&mStateLock, nullptr); VerifyOrReturnError(ret == 0, CHIP_ERROR_POSIX(ret)); - mHasValidChipTask = false; - return CHIP_NO_ERROR; } @@ -117,7 +113,11 @@ void GenericPlatformManagerImpl_POSIX::_UnlockChipStack() template bool GenericPlatformManagerImpl_POSIX::_IsChipStackLockedByCurrentThread() const { - return !mHasValidChipTask || (mChipStackIsLocked && (pthread_equal(pthread_self(), mChipStackLockOwnerThread))); + // If no Matter thread is currently running we do not have to worry about + // locking. Hence, this function always returns true in that case. + if (mState.load(std::memory_order_relaxed) == State::kStopped) + return true; + return mChipStackIsLocked && (pthread_equal(pthread_self(), mChipStackLockOwnerThread)); } #endif @@ -153,18 +153,16 @@ void GenericPlatformManagerImpl_POSIX::_RunEventLoop() pthread_mutex_lock(&mStateLock); // - // If we haven't set mHasValidChipTask by now, it means that the application did not call StartEventLoopTask - // and consequently, are running the event loop from their own, externally managed task. - // Let's track his appropriately since we need this info later when stopping the event queues. + // If we haven't set mInternallyManagedChipTask by now, it means that the application did not call + // StartEventLoopTask and consequently, are running the event loop from their own, externally managed + // task. // - if (!mHasValidChipTask) + if (!mInternallyManagedChipTask) { - mHasValidChipTask = true; - mChipTask = pthread_self(); - mTaskType = kExternallyManagedTask; + mChipTask = pthread_self(); + mState.store(State::kRunning, std::memory_order_relaxed); } - mEventQueueHasStopped = false; pthread_mutex_unlock(&mStateLock); Impl()->LockChipStack(); @@ -187,7 +185,7 @@ void GenericPlatformManagerImpl_POSIX::_RunEventLoop() Impl()->UnlockChipStack(); pthread_mutex_lock(&mStateLock); - mEventQueueHasStopped = true; + mState.store(State::kStopping, std::memory_order_relaxed); pthread_mutex_unlock(&mStateLock); // @@ -195,6 +193,13 @@ void GenericPlatformManagerImpl_POSIX::_RunEventLoop() // StopEventLoopTask(). // pthread_cond_signal(&mEventQueueStoppedCond); + + // + // Mark event loop as truly stopped. After that line, we can not use any + // non-simple type member variables, because they can be destroyed by the + // Shutdown() method. + // + mState.store(State::kStopped, std::memory_order_relaxed); } template @@ -230,8 +235,8 @@ CHIP_ERROR GenericPlatformManagerImpl_POSIX::_StartEventLoopTask() err = pthread_create(&mChipTask, &mChipTaskAttr, EventLoopTaskMain, this); if (err == 0) { - mHasValidChipTask = true; - mTaskType = kInternallyManagedTask; + mInternallyManagedChipTask = true; + mState.store(State::kRunning, std::memory_order_relaxed); } pthread_mutex_unlock(&mStateLock); @@ -255,7 +260,8 @@ CHIP_ERROR GenericPlatformManagerImpl_POSIX::_StopEventLoopTask() // If we're calling this from a different thread than the one running chip, then // we need to wait till the event queue has completely stopped before proceeding. // - if (mHasValidChipTask && (pthread_equal(pthread_self(), mChipTask) == 0)) + auto isRunning = mState.load(std::memory_order_relaxed) == State::kRunning; + if (isRunning && (pthread_equal(pthread_self(), mChipTask) == 0)) { pthread_mutex_unlock(&mStateLock); @@ -269,7 +275,7 @@ CHIP_ERROR GenericPlatformManagerImpl_POSIX::_StopEventLoopTask() pthread_mutex_lock(&mStateLock); - while (!mEventQueueHasStopped) + while (mState.load(std::memory_order_relaxed) == State::kRunning) { err = pthread_cond_wait(&mEventQueueStoppedCond, &mStateLock); VerifyOrExit(err == 0, ); @@ -280,7 +286,7 @@ CHIP_ERROR GenericPlatformManagerImpl_POSIX::_StopEventLoopTask() // // Wait further for the thread to terminate if we had previously created it. // - if (mTaskType == kInternallyManagedTask) + if (mInternallyManagedChipTask) { err = pthread_join(mChipTask, nullptr); VerifyOrExit(err == 0, ); @@ -292,13 +298,19 @@ CHIP_ERROR GenericPlatformManagerImpl_POSIX::_StopEventLoopTask() } exit: - mHasValidChipTask = false; return CHIP_ERROR_POSIX(err); } template void GenericPlatformManagerImpl_POSIX::_Shutdown() { + // + // We cannot shutdown the stack while the event loop is still running. This can lead + // to use after free errors - here we are destroying mutex and condition variable that + // are still in use by the event loop! + // + VerifyOrDie(mState.load(std::memory_order_relaxed) == State::kStopped); + pthread_mutex_destroy(&mStateLock); pthread_cond_destroy(&mEventQueueStoppedCond);