Skip to content

Commit

Permalink
chip-repl hits the Code is unsafe/racy assert when BLE commissioning …
Browse files Browse the repository at this point in the history
…is started (#26338)

* chip-repl hits the Code is unsafe/racy assert when BLE commissioning is started

* Add lock/unlock to CancelTimer

* Fix a few other issue

* Restyle

* Small fix

* Last few fixes
  • Loading branch information
tehampson authored May 5, 2023
1 parent 8b05d8e commit da37e59
Show file tree
Hide file tree
Showing 5 changed files with 48 additions and 14 deletions.
4 changes: 2 additions & 2 deletions src/controller/python/chip/ble/library_handle.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@
from ctypes import c_bool, c_char_p, c_uint32, c_void_p, py_object

import chip.native
from chip.ble.types import DeviceScannedCallback, ScanDoneCallback
from chip.ble.types import DeviceScannedCallback, ScanDoneCallback, ScanErrorCallback


# This prevents python auto-casting c_void_p to integers and
Expand Down Expand Up @@ -58,7 +58,7 @@ def _GetBleLibraryHandle() -> ctypes.CDLL:
VoidPointer, [VoidPointer])

setter.Set('pychip_ble_start_scanning', VoidPointer, [
py_object, VoidPointer, c_uint32, DeviceScannedCallback, ScanDoneCallback
py_object, VoidPointer, c_uint32, DeviceScannedCallback, ScanDoneCallback, ScanErrorCallback
])

return handle
23 changes: 19 additions & 4 deletions src/controller/python/chip/ble/scan_devices.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,7 @@
from typing import Generator

from chip.ble.library_handle import _GetBleLibraryHandle
from chip.ble.types import DeviceScannedCallback, ScanDoneCallback
from chip.ble.types import DeviceScannedCallback, ScanDoneCallback, ScanErrorCallback


@DeviceScannedCallback
Expand All @@ -34,7 +34,12 @@ def ScanDoneCallback(closure):
closure.ScanCompleted()


def DiscoverAsync(timeoutMs: int, scanCallback, doneCallback, adapter=None):
@ScanErrorCallback
def ScanErrorCallback(closure, errorCode: int):
closure.ScanErrorCallback(errorCode)


def DiscoverAsync(timeoutMs: int, scanCallback, doneCallback, errorCallback, adapter=None):
"""Initiate a BLE discovery of devices with the given timeout.
NOTE: devices are not guaranteed to be unique. New entries are returned
Expand All @@ -44,6 +49,7 @@ def DiscoverAsync(timeoutMs: int, scanCallback, doneCallback, adapter=None):
timeoutMs: scan will complete after this time
scanCallback: callback when a device is found
doneCallback: callback when the scan is complete
errorCallback: callback when error occurred during scan
adapter: what adapter to choose. Either an AdapterInfo object or
a string with the adapter address. If None, the first
adapter on the system is used.
Expand Down Expand Up @@ -72,14 +78,17 @@ def ScanCompleted(self, *args):
doneCallback(*args)
ctypes.pythonapi.Py_DecRef(ctypes.py_object(self))

def ScanErrorCallback(self, *args):
errorCallback(*args)

closure = ScannerClosure()
ctypes.pythonapi.Py_IncRef(ctypes.py_object(closure))

scanner = handle.pychip_ble_start_scanning(
ctypes.py_object(closure),
handle.pychip_ble_adapter_list_get_raw_adapter(
nativeList), timeoutMs,
ScanFoundCallback, ScanDoneCallback)
ScanFoundCallback, ScanDoneCallback, ScanErrorCallback)

if scanner == 0:
raise Exception('Failed to initiate scan')
Expand Down Expand Up @@ -113,6 +122,12 @@ def DeviceFound(self, address, discriminator, vendor, product):
def ScanCompleted(self):
self.queue.put(None)

def ScanError(self, errorCode):
# TODO need to determine what we do with this error. Most of the time this
# error is just a timeout introduced in PR #24873, right before we get a
# ScanCompleted.
pass


def DiscoverSync(timeoutMs: int, adapter=None) -> Generator[DeviceInfo, None, None]:
"""Discover BLE devices over the specified period of time.
Expand All @@ -131,7 +146,7 @@ def DiscoverSync(timeoutMs: int, adapter=None) -> Generator[DeviceInfo, None, No

receiver = _DeviceInfoReceiver()
DiscoverAsync(timeoutMs, receiver.DeviceFound,
receiver.ScanCompleted, adapter)
receiver.ScanCompleted, receiver.ScanError, adapter)

while True:
data = receiver.queue.get()
Expand Down
2 changes: 2 additions & 0 deletions src/controller/python/chip/ble/types.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,3 +20,5 @@
c_uint16, c_uint16)

ScanDoneCallback = CFUNCTYPE(None, py_object)

ScanErrorCallback = CFUNCTYPE(None, py_object, c_uint16)
28 changes: 20 additions & 8 deletions src/platform/Linux/bluez/ChipDeviceScanner.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -78,8 +78,16 @@ ChipDeviceScanner::~ChipDeviceScanner()
{
StopScan();

// In case the timeout timer is still active
chip::DeviceLayer::SystemLayer().CancelTimer(TimerExpiredCallback, this);
// mTimerExpired should only be set to true in the TimerExpiredCallback, which means we are in that callback
// right now so there is no need to cancel the timer. Doing so would result in deadlock trying to aquire the
// chip stack lock which we already currently have.
if (!mTimerExpired)
{
// In case the timeout timer is still active
DeviceLayer::PlatformMgr().LockChipStack();
chip::DeviceLayer::SystemLayer().CancelTimer(TimerExpiredCallback, this);
DeviceLayer::PlatformMgr().UnlockChipStack();

This comment has been minimized.

Copy link
@tianfeng-yang

tianfeng-yang May 6, 2023

Contributor

A deadlock can occur here.

#if CHIP_STACK_LOCK_TRACKING_ENABLED
        if (!DeviceLayer::PlatformMgr().IsChipStackLockedByCurrentThread())
        {
            DeviceLayer::PlatformMgr().LockChipStack();
            chip::DeviceLayer::SystemLayer().CancelTimer(TimerExpiredCallback, this);
            DeviceLayer::PlatformMgr().UnlockChipStack();
        }
        else
#endif
         chip::DeviceLayer::SystemLayer().CancelTimer(TimerExpiredCallback, this);
}

g_object_unref(mManager);
g_object_unref(mCancellable);
Expand Down Expand Up @@ -136,21 +144,25 @@ CHIP_ERROR ChipDeviceScanner::StartScan(System::Clock::Timeout timeout)
return CHIP_ERROR_INTERNAL;
}

DeviceLayer::PlatformMgr().LockChipStack();
CHIP_ERROR err = chip::DeviceLayer::SystemLayer().StartTimer(timeout, TimerExpiredCallback, static_cast<void *>(this));
DeviceLayer::PlatformMgr().UnlockChipStack();

This comment has been minimized.

Copy link
@tianfeng-yang

tianfeng-yang May 6, 2023

Contributor

A deadlock can occur here.

    CHIP_ERROR err = CHIP_NO_ERROR;
#if CHIP_STACK_LOCK_TRACKING_ENABLED
    if (!DeviceLayer::PlatformMgr().IsChipStackLockedByCurrentThread())
    {
        DeviceLayer::PlatformMgr().LockChipStack();
        err = chip::DeviceLayer::SystemLayer().StartTimer(timeout, TimerExpiredCallback, static_cast<void *>(this));
        DeviceLayer::PlatformMgr().UnlockChipStack();
    }
    else
#endif
    err = chip::DeviceLayer::SystemLayer().StartTimer(timeout, TimerExpiredCallback, static_cast<void *>(this));

This comment has been minimized.

Copy link
@tehampson

tehampson May 8, 2023

Author Contributor

@tianfeng-yang , sorry about that.

I can make these fixes this morning, but I would like to better understand why. Can you explain how you are repoducing the issue?

This comment has been minimized.

Copy link
@tehampson

tehampson May 8, 2023

Author Contributor

I do feel like checking DeviceLayer::PlatformMgr().IsChipStackLockedByCurrentThread() might be an anti-pattern at the moment. So please let me know how you reproduce this issue. I think this would be better tracked as an issue

This comment has been minimized.

Copy link
@tehampson

tehampson May 8, 2023

Author Contributor

Currently have a draft PR over here #26418

This comment has been minimized.

Copy link
@tianfeng-yang

tianfeng-yang May 10, 2023

Contributor
  1. git checkout da37e59
  2. comment code src/platform/Linux/PlatformManagerImpl.cpp:218 // there is another issues #25960
  3. scripts/run_in_build_env.sh './scripts/build_python.sh --install_wheel build-env -d true' && source ./scripts/activate.sh
  4. install dependencies: pip3 install PyQt5 && sudo apt install python3-pyqt5
  5. run scripts: python3 test_ble_commission.py // Sent to your slack
  6. an application will pops up, with a commission button
  7. click the button, it's will be deadlock.
  8. attach to deadlocked process with gdb and dump backtraces of all threads gdb -p <PID> -ex 'thread apply all bt'

the gdb log in my case:

GNU gdb (Ubuntu 12.1-0ubuntu1~22.04) 12.1
Copyright (C) 2022 Free Software Foundation, Inc.
License GPLv3+: GNU GPL version 3 or later <http://gnu.org/licenses/gpl.html>
This is free software: you are free to change and redistribute it.
There is NO WARRANTY, to the extent permitted by law.
Type "show copying" and "show warranty" for details.
This GDB was configured as "x86_64-linux-gnu".
Type "show configuration" for configuration details.
For bug reporting instructions, please see:
<https://www.gnu.org/software/gdb/bugs/>.
Find the GDB manual and other documentation resources online at:
    <http://www.gnu.org/software/gdb/documentation/>.

For help, type "help".
Type "apropos word" to search for commands related to "word".
Attaching to process 141645
[New LWP 141646]
[New LWP 141647]
[New LWP 141648]
[New LWP 141649]
[New LWP 141650]
[New LWP 141651]
[New LWP 141652]
[New LWP 141653]
[Thread debugging using libthread_db enabled]
Using host libthread_db library "/lib/x86_64-linux-gnu/libthread_db.so.1".
0x00007f1844518d7f in __GI___poll (fds=0x561c913bc100, nfds=2, timeout=8274) at ../sysdeps/unix/sysv/linux/poll.c:29
29      ../sysdeps/unix/sysv/linux/poll.c: No such file or directory.

Thread 9 (Thread 0x7f1824802640 (LWP 141653) "gdbus"):
#0  0x00007f1844518d7f in __GI___poll (fds=0x561c9109b8f0, nfds=2, timeout=-1) at ../sysdeps/unix/sysv/linux/poll.c:29
#1  0x00007f1840570666 in  () at /lib/x86_64-linux-gnu/libglib-2.0.so.0
#2  0x00007f184051b2b3 in g_main_loop_run () at /lib/x86_64-linux-gnu/libglib-2.0.so.0
#3  0x00007f183633b07a in  () at /lib/x86_64-linux-gnu/libgio-2.0.so.0
#4  0x00007f184054aa51 in  () at /lib/x86_64-linux-gnu/libglib-2.0.so.0
#5  0x00007f1844494b43 in start_thread (arg=<optimized out>) at ./nptl/pthread_create.c:442
#6  0x00007f1844526a00 in clone3 () at ../sysdeps/unix/sysv/linux/x86_64/clone3.S:81

Thread 8 (Thread 0x7f1825003640 (LWP 141652) "gmain"):
#0  0x00007f1844518d7f in __GI___poll (fds=0x561c90fea710, nfds=1, timeout=-1) at ../sysdeps/unix/sysv/linux/poll.c:29
#1  0x00007f1840570666 in  () at /lib/x86_64-linux-gnu/libglib-2.0.so.0
#2  0x00007f18405193e3 in g_main_context_iteration () at /lib/x86_64-linux-gnu/libglib-2.0.so.0
#3  0x00007f1840519431 in  () at /lib/x86_64-linux-gnu/libglib-2.0.so.0
#4  0x00007f184054aa51 in  () at /lib/x86_64-linux-gnu/libglib-2.0.so.0
#5  0x00007f1844494b43 in start_thread (arg=<optimized out>) at ./nptl/pthread_create.c:442
#6  0x00007f1844526a00 in clone3 () at ../sysdeps/unix/sysv/linux/x86_64/clone3.S:81
--Type <RET> for more, q to quit, c to continue without paging--

Thread 7 (Thread 0x7f1825804640 (LWP 141651) "Thread (pooled)"):
#0  __futex_abstimed_wait_common64 (private=<optimized out>, cancel=true, abstime=0x7f1825802de0, op=137, expected=0, futex_word=0x7f18100030d0) at ./nptl/futex-internal.c:57
#1  __futex_abstimed_wait_common (cancel=true, private=<optimized out>, abstime=0x7f1825802de0, clockid=0, expected=0, futex_word=0x7f18100030d0) at ./nptl/futex-internal.c:87
#2  __GI___futex_abstimed_wait_cancelable64 (futex_word=futex_word@entry=0x7f18100030d0, expected=expected@entry=0, clockid=clockid@entry=1, abstime=abstime@entry=0x7f1825802de0, private=<optimized out>) at ./nptl/futex-internal.c:139
#3  0x00007f184449bf50 in do_futex_wait (sem=sem@entry=0x7f18100030d0, clockid=clockid@entry=1, abstime=abstime@entry=0x7f1825802de0) at ./nptl/sem_waitcommon.c:111
#4  0x00007f184449bffe in __new_sem_wait_slow64 (sem=0x7f18100030d0, clockid=1, abstime=0x7f1825802de0) at ./nptl/sem_waitcommon.c:183
#5  0x0000561c8ecec09b in PyThread_acquire_lock_timed ()
#6  0x0000561c8ed44464 in  ()
#7  0x0000561c8ed2f0a9 in  ()
#8  0x0000561c8ed168cb in _PyEval_EvalFrameDefault ()
#9  0x0000561c8ed2e1ec in _PyFunction_Vectorcall ()
#10 0x0000561c8ed168cb in _PyEval_EvalFrameDefault ()
#11 0x0000561c8ed2e1ec in _PyFunction_Vectorcall ()
#12 0x0000561c8ed168cb in _PyEval_EvalFrameDefault ()
#13 0x0000561c8ed2e1ec in _PyFunction_Vectorcall ()
#14 0x0000561c8ed168cb in _PyEval_EvalFrameDefault ()
#15 0x0000561c8ed2e1ec in _PyFunction_Vectorcall ()
#16 0x0000561c8ed168cb in _PyEval_EvalFrameDefault ()
#17 0x0000561c8ed2e1ec in _PyFunction_Vectorcall ()
#18 0x0000561c8ed168cb in _PyEval_EvalFrameDefault ()
#19 0x0000561c8ed2e1ec in _PyFunction_Vectorcall ()
#20 0x0000561c8ed18af0 in _PyEval_EvalFrameDefault ()
#21 0x0000561c8ed3be91 in  ()
#22 0x00007f1840082974 in call_method (va=0x7f1825803c20, fmt=0x7f1837e8142d "", method=0x7f1841549900) at siplib.c:2264
#23 sip_api_call_procedure_method (gil_state=PyGILState_UNLOCKED, error_handler=0x7f1837cf0b30 <sipVEH_QtCore_PyQt5(_sipSimpleWrapper*, PyGILState_STATE)>, py_self=0x7f183441c790, method=0x7f1841549900, fmt=0x7f1837e8142d "") at siplib.c:2286
#24 0x00007f1837dc9dfd in sipQRunnable::run() () at /home/yzm157/workspaces/github/connectedhomeip/.environment/pigweed-venv/lib/python3.10/site-packages/PyQt5/QtCore.abi3.so
#25 0x00007f183a4b766a in QThreadPoolThread::run() () at /home/yzm157/workspaces/github/connectedhomeip/.environment/pigweed-venv/lib/python3.10/site-packages/PyQt5/Qt5/lib/libQt5Core.so.5
#26 0x00007f183a4b3b35 in QThreadPrivate::start(void*) () at /home/yzm157/workspaces/github/connectedhomeip/.environment/pigweed-venv/lib/python3.10/site-packages/PyQt5/Qt5/lib/libQt5Core.so.5
#27 0x00007f1844494b43 in start_thread (arg=<optimized out>) at ./nptl/pthread_create.c:442
#28 0x00007f1844526a00 in clone3 () at ../sysdeps/unix/sysv/linux/x86_64/clone3.S:81

Thread 6 (Thread 0x7f1827fff640 (LWP 141650) "python3"):
#0  futex_wait (private=0, expected=2, futex_word=0x7f18356afd60 <chip::DeviceLayer::PlatformManagerImpl::sInstance+32>) at ../sysdeps/nptl/futex-internal.h:1--Type <RET> for more, q to quit, c to continue without paging--
46
#1  __GI___lll_lock_wait (futex=futex@entry=0x7f18356afd60 <chip::DeviceLayer::PlatformManagerImpl::sInstance+32>, private=0) at ./nptl/lowlevellock.c:49
#2  0x00007f1844498082 in lll_mutex_lock_optimized (mutex=0x7f18356afd60 <chip::DeviceLayer::PlatformManagerImpl::sInstance+32>) at ./nptl/pthread_mutex_lock.c:48
#3  ___pthread_mutex_lock (mutex=0x7f18356afd60 <chip::DeviceLayer::PlatformManagerImpl::sInstance+32>) at ./nptl/pthread_mutex_lock.c:93
#4  0x00007f1835503a26 in chip::DeviceLayer::Internal::GenericPlatformManagerImpl_POSIX<chip::DeviceLayer::PlatformManagerImpl>::_LockChipStack() (this=0x7f18356afd50 <chip::DeviceLayer::PlatformManagerImpl::sInstance+16>) at ../../src/include/platform/internal/GenericPlatformManagerImpl_POSIX.ipp:76
#5  0x00007f183528ad1e in chip::DeviceLayer::PlatformManager::LockChipStack() (this=0x7f18356afd40 <chip::DeviceLayer::PlatformManagerImpl::sInstance>) at ../../src/include/platform/PlatformManager.h:453
#6  0x00007f183550b3d8 in chip::DeviceLayer::Internal::ChipDeviceScanner::StartScan(std::chrono::duration<unsigned int, std::ratio<1l, 1000l> >) (this=0x7f1820038b50, timeout=...) at ../../src/platform/Linux/bluez/ChipDeviceScanner.cpp:147
#7  0x00007f18354f3ec1 in chip::DeviceLayer::Internal::BLEManagerImpl::InitiateScan(chip::DeviceLayer::Internal::BleScanState) (this=0x7f18356af940 <chip::DeviceLayer::Internal::BLEManagerImpl::sInstance>, scanType=chip::DeviceLayer::Internal::BleScanState::kScanForDiscriminator) at ../../src/platform/Linux/BLEManagerImpl.cpp:698
#8  0x00007f18354f4011 in chip::DeviceLayer::Internal::BLEManagerImpl::InitiateScan(long) (arg=1) at ../../src/platform/Linux/BLEManagerImpl.cpp:718
#9  0x00007f18355036b5 in chip::DeviceLayer::Internal::GenericPlatformManagerImpl<chip::DeviceLayer::PlatformManagerImpl>::_DispatchEvent(chip::DeviceLayer::ChipDeviceEvent const*) (this=0x7f18356afd50 <chip::DeviceLayer::PlatformManagerImpl::sInstance+16>, event=0x7f1827ffedc0) at ../../src/include/platform/internal/GenericPlatformManagerImpl.ipp:290
#10 0x00007f18355028d1 in chip::DeviceLayer::PlatformManager::DispatchEvent(chip::DeviceLayer::ChipDeviceEvent const*) (this=0x7f18356afd40 <chip::DeviceLayer::PlatformManagerImpl::sInstance>, event=0x7f1827ffedc0) at ../../src/include/platform/PlatformManager.h:505
#11 0x00007f18355042c1 in chip::DeviceLayer::Internal::GenericPlatformManagerImpl_POSIX<chip::DeviceLayer::PlatformManagerImpl>::ProcessDeviceEvents() (this=0x7f18356afd50 <chip::DeviceLayer::PlatformManagerImpl::sInstance+16>) at ../../src/include/platform/internal/GenericPlatformManagerImpl_POSIX.ipp:148
#12 0x00007f1835503cd8 in chip::DeviceLayer::Internal::GenericPlatformManagerImpl_POSIX<chip::DeviceLayer::PlatformManagerImpl>::_RunEventLoop() (this=0x7f18356afd50 <chip::DeviceLayer::PlatformManagerImpl::sInstance+16>) at ../../src/include/platform/internal/GenericPlatformManagerImpl_POSIX.ipp:183
#13 0x00007f1835502844 in chip::DeviceLayer::PlatformManager::RunEventLoop() (this=0x7f18356afd40 <chip::DeviceLayer::PlatformManagerImpl::sInstance>) at ../../src/include/platform/PlatformManager.h:405
#14 0x00007f1835504341 in chip::DeviceLayer::Internal::GenericPlatformManagerImpl_POSIX<chip::DeviceLayer::PlatformManagerImpl>::EventLoopTaskMain(void*) (arg=0x7f18356afd50 <chip::DeviceLayer::PlatformManagerImpl::sInstance+16>) at ../../src/include/platform/internal/GenericPlatformManagerImpl_POSIX.ipp:211
#15 0x00007f1844494b43 in start_thread (arg=<optimized out>) at ./nptl/pthread_create.c:442
#16 0x00007f1844526a00 in clone3 () at ../sysdeps/unix/sysv/linux/x86_64/clone3.S:81

Thread 5 (Thread 0x7f1834dff640 (LWP 141649) "gmain-matter"):
#0  syscall () at ../sysdeps/unix/sysv/linux/x86_64/syscall.S:38
#1  0x00007f184056a1d3 in g_cond_wait () at /lib/x86_64-linux-gnu/libglib-2.0.so.0
#2  0x00007f1840519235 in  () at /lib/x86_64-linux-gnu/libglib-2.0.so.0
#3  0x00007f184051b318 in g_main_loop_run () at /lib/x86_64-linux-gnu/libglib-2.0.so.0
#4  0x00007f1835504563 in chip::DeviceLayer::(anonymous namespace)::GLibMainLoopThread(void*) (loop=0x561c90fda600) at ../../src/platform/Linux/PlatformManagerImpl.cpp:61
#5  0x00007f184054aa51 in  () at /lib/x86_64-linux-gnu/libglib-2.0.so.0
#6  0x00007f1844494b43 in start_thread (arg=<optimized out>) at ./nptl/pthread_create.c:442
#7  0x00007f1844526a00 in clone3 () at ../sysdeps/unix/sysv/linux/x86_64/clone3.S:81

--Type <RET> for more, q to quit, c to continue without paging--
Thread 4 (Thread 0x7f1835fff640 (LWP 141648) "QDBusConnection"):
#0  0x00007f1844518d7f in __GI___poll (fds=0x7f1828004ed0, nfds=2, timeout=-1) at ../sysdeps/unix/sysv/linux/poll.c:29
#1  0x00007f1840570666 in  () at /lib/x86_64-linux-gnu/libglib-2.0.so.0
#2  0x00007f18405193e3 in g_main_context_iteration () at /lib/x86_64-linux-gnu/libglib-2.0.so.0
#3  0x00007f183a6f91cc in QEventDispatcherGlib::processEvents(QFlags<QEventLoop::ProcessEventsFlag>) () at /home/yzm157/workspaces/github/connectedhomeip/.environment/pigweed-venv/lib/python3.10/site-packages/PyQt5/Qt5/lib/libQt5Core.so.5
#4  0x00007f183a69c21a in QEventLoop::exec(QFlags<QEventLoop::ProcessEventsFlag>) () at /home/yzm157/workspaces/github/connectedhomeip/.environment/pigweed-venv/lib/python3.10/site-packages/PyQt5/Qt5/lib/libQt5Core.so.5
#5  0x00007f183a4b2844 in QThread::exec() () at /home/yzm157/workspaces/github/connectedhomeip/.environment/pigweed-venv/lib/python3.10/site-packages/PyQt5/Qt5/lib/libQt5Core.so.5
#6  0x00007f1836c15fd5 in QDBusConnectionManager::run() () at /home/yzm157/workspaces/github/connectedhomeip/.environment/pigweed-venv/lib/python3.10/site-packages/PyQt5/Qt5/plugins/platforms/../../lib/libQt5DBus.so.5
#7  0x00007f183a4b3b35 in QThreadPrivate::start(void*) () at /home/yzm157/workspaces/github/connectedhomeip/.environment/pigweed-venv/lib/python3.10/site-packages/PyQt5/Qt5/lib/libQt5Core.so.5
#8  0x00007f1844494b43 in start_thread (arg=<optimized out>) at ./nptl/pthread_create.c:442
#9  0x00007f1844526a00 in clone3 () at ../sysdeps/unix/sysv/linux/x86_64/clone3.S:81

Thread 3 (Thread 0x7f1836bff640 (LWP 141647) "QXcbEventQueue"):
#0  0x00007f1844518d7f in __GI___poll (fds=0x7f1836bfeca8, nfds=1, timeout=-1) at ../sysdeps/unix/sysv/linux/poll.c:29
#1  0x00007f18400a27e2 in  () at /lib/x86_64-linux-gnu/libxcb.so.1
#2  0x00007f18400a422c in xcb_wait_for_event () at /lib/x86_64-linux-gnu/libxcb.so.1
#3  0x00007f18370647b0 in QXcbEventQueue::run() () at /home/yzm157/workspaces/github/connectedhomeip/.environment/pigweed-venv/lib/python3.10/site-packages/PyQt5/Qt5/plugins/platforms/../../lib/libQt5XcbQpa.so.5
#4  0x00007f183a4b3b35 in QThreadPrivate::start(void*) () at /home/yzm157/workspaces/github/connectedhomeip/.environment/pigweed-venv/lib/python3.10/site-packages/PyQt5/Qt5/lib/libQt5Core.so.5
#5  0x00007f1844494b43 in start_thread (arg=<optimized out>) at ./nptl/pthread_create.c:442
#6  0x00007f1844526a00 in clone3 () at ../sysdeps/unix/sysv/linux/x86_64/clone3.S:81

Thread 2 (Thread 0x7f184145c640 (LWP 141646) "python3"):
#0  __futex_abstimed_wait_common64 (private=<optimized out>, cancel=true, abstime=0x7f184145b320, op=137, expected=0, futex_word=0x7f183c000dc0) at ./nptl/futex-internal.c:57
#1  __futex_abstimed_wait_common (cancel=true, private=<optimized out>, abstime=0x7f184145b320, clockid=0, expected=0, futex_word=0x7f183c000dc0) at ./nptl/futex-internal.c:87
#2  __GI___futex_abstimed_wait_cancelable64 (futex_word=futex_word@entry=0x7f183c000dc0, expected=expected@entry=0, clockid=clockid@entry=1, abstime=abstime@entry=0x7f184145b320, private=<optimized out>) at ./nptl/futex-internal.c:139
#3  0x00007f184449bf50 in do_futex_wait (sem=sem@entry=0x7f183c000dc0, clockid=clockid@entry=1, abstime=abstime@entry=0x7f184145b320) at ./nptl/sem_waitcommon.c:111
#4  0x00007f184449bffe in __new_sem_wait_slow64 (sem=0x7f183c000dc0, clockid=1, abstime=0x7f184145b320) at ./nptl/sem_waitcommon.c:183
#5  0x0000561c8ecec09b in PyThread_acquire_lock_timed ()
#6  0x0000561c8ed44464 in  ()
#7  0x0000561c8ed2f0a9 in  ()
#8  0x0000561c8ed168cb in _PyEval_EvalFrameDefault ()
--Type <RET> for more, q to quit, c to continue without paging--
#9  0x0000561c8ed2e1ec in _PyFunction_Vectorcall ()
#10 0x0000561c8ed168cb in _PyEval_EvalFrameDefault ()
#11 0x0000561c8ed3be91 in  ()
#12 0x0000561c8ed18af0 in _PyEval_EvalFrameDefault ()
#13 0x0000561c8ed2e1ec in _PyFunction_Vectorcall ()
#14 0x0000561c8ed168cb in _PyEval_EvalFrameDefault ()
#15 0x0000561c8ed2e1ec in _PyFunction_Vectorcall ()
#16 0x0000561c8ed168cb in _PyEval_EvalFrameDefault ()
#17 0x0000561c8ed3be91 in  ()
#18 0x0000561c8ee67e5b in  ()
#19 0x0000561c8ee5df58 in  ()
#20 0x00007f1844494b43 in start_thread (arg=<optimized out>) at ./nptl/pthread_create.c:442
#21 0x00007f1844526a00 in clone3 () at ../sysdeps/unix/sysv/linux/x86_64/clone3.S:81

Thread 1 (Thread 0x7f184472a000 (LWP 141645) "python3"):
#0  0x00007f1844518d7f in __GI___poll (fds=0x561c913bc100, nfds=2, timeout=8274) at ../sysdeps/unix/sysv/linux/poll.c:29
#1  0x00007f1840570666 in  () at /lib/x86_64-linux-gnu/libglib-2.0.so.0
#2  0x00007f18405193e3 in g_main_context_iteration () at /lib/x86_64-linux-gnu/libglib-2.0.so.0
#3  0x00007f183a6f91cc in QEventDispatcherGlib::processEvents(QFlags<QEventLoop::ProcessEventsFlag>) () at /home/yzm157/workspaces/github/connectedhomeip/.environment/pigweed-venv/lib/python3.10/site-packages/PyQt5/Qt5/lib/libQt5Core.so.5
#4  0x00007f183a69c21a in QEventLoop::exec(QFlags<QEventLoop::ProcessEventsFlag>) () at /home/yzm157/workspaces/github/connectedhomeip/.environment/pigweed-venv/lib/python3.10/site-packages/PyQt5/Qt5/lib/libQt5Core.so.5
#5  0x00007f183a6a51d3 in QCoreApplication::exec() () at /home/yzm157/workspaces/github/connectedhomeip/.environment/pigweed-venv/lib/python3.10/site-packages/PyQt5/Qt5/lib/libQt5Core.so.5
#6  0x00007f1840842cc1 in meth_QApplication_exec () at /home/yzm157/workspaces/github/connectedhomeip/.environment/pigweed-venv/lib/python3.10/site-packages/PyQt5/QtWidgets.abi3.so
#7  0x0000561c8ed2d9c8 in  ()
#8  0x0000561c8ed244ab in _PyObject_MakeTpCall ()
#9  0x0000561c8ed1ce66 in _PyEval_EvalFrameDefault ()
#10 0x0000561c8ed12ed6 in  ()
#11 0x0000561c8ee09366 in PyEval_EvalCode ()
#12 0x0000561c8ee36108 in  ()
#13 0x0000561c8ee2ef5b in  ()
#14 0x0000561c8ee35e55 in  ()
#15 0x0000561c8ee35338 in _PyRun_SimpleFileObject ()
#16 0x0000561c8ee35033 in _PyRun_AnyFileObject ()
#17 0x0000561c8ee262de in Py_RunMain ()
#18 0x0000561c8edfc32d in Py_BytesMain ()
#19 0x00007f1844429d90 in __libc_start_call_main (main=main@entry=0x561c8edfc2f0, argc=argc@entry=2, argv=argv@entry=0x7ffd40c65808) at ../sysdeps/nptl/libc_start_call_main.h:58
#20 0x00007f1844429e40 in __libc_start_main_impl (main=0x561c8edfc2f0, argc=2, argv=0x7ffd40c65808, init=<optimized out>, fini=<optimized out>, rtld_fini=<optimized out>, stack_end=0x7ffd40c657f8) at ../csu/libc-start.c:392
--Type <RET> for more, q to quit, c to continue without paging--
#21 0x0000561c8edfc225 in _start ()


if (err != CHIP_NO_ERROR)
{
ChipLogError(Ble, "Failed to schedule scan timeout.");
StopScan();
return err;
}
mTimerExpired = false;

return CHIP_NO_ERROR;
}

void ChipDeviceScanner::TimerExpiredCallback(chip::System::Layer * layer, void * appState)
{
ChipDeviceScanner * chipDeviceScanner = static_cast<ChipDeviceScanner *>(appState);
chipDeviceScanner->MarkTimerExpired();
chipDeviceScanner->mDelegate->OnScanError(CHIP_ERROR_TIMEOUT);
chipDeviceScanner->StopScan();
}
Expand Down Expand Up @@ -180,6 +192,11 @@ CHIP_ERROR ChipDeviceScanner::StopScan()
return CHIP_ERROR_INTERNAL;
}

ChipDeviceScannerDelegate * delegate = this->mDelegate;
// callback is explicitly allowed to delete the scanner (hence no more
// references to 'self' here)
delegate->OnScanComplete();

return CHIP_NO_ERROR;
}

Expand All @@ -192,12 +209,7 @@ CHIP_ERROR ChipDeviceScanner::MainLoopStopScan(ChipDeviceScanner * self)
ChipLogError(Ble, "Failed to stop discovery %s", error->message);
g_error_free(error);
}
ChipDeviceScannerDelegate * delegate = self->mDelegate;
self->mIsScanning = false;

// callback is explicitly allowed to delete the scanner (hence no more
// references to 'self' here)
delegate->OnScanComplete();
self->mIsScanning = false;

return CHIP_NO_ERROR;
}
Expand Down
5 changes: 5 additions & 0 deletions src/platform/Linux/bluez/ChipDeviceScanner.h
Original file line number Diff line number Diff line change
Expand Up @@ -71,6 +71,9 @@ class ChipDeviceScanner
/// Stop any currently running scan
CHIP_ERROR StopScan();

/// Should only be called by TimerExpiredCallback.
void MarkTimerExpired() { mTimerExpired = true; }

/// Create a new device scanner
///
/// Convenience method to allocate any required variables.
Expand Down Expand Up @@ -101,6 +104,8 @@ class ChipDeviceScanner
gulong mInterfaceChangedSignal = 0;
bool mIsScanning = false;
bool mIsStopping = false;
/// Used to track if timer has alread expired and doesn't need to be canceled.
bool mTimerExpired = false;
};

} // namespace Internal
Expand Down

1 comment on commit da37e59

@tianfeng-yang
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@tehampson This patch made my application deadlock, I have added comments in the code

Please sign in to comment.