Skip to content

Commit

Permalink
Add full threading backtrace dumps in Cirque and REPL (#21615)
Browse files Browse the repository at this point in the history
* Add full threading backtrace dumps in Cirque and REPL tests on test
timeout failure.

* Add a in-python thread backtrace dump

* Put back timeout
  • Loading branch information
mrjerryjohns authored and pull[bot] committed Aug 5, 2023
1 parent 122804a commit f50f1bc
Show file tree
Hide file tree
Showing 4 changed files with 44 additions and 8 deletions.
12 changes: 9 additions & 3 deletions scripts/tests/run_python_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -101,13 +101,19 @@ def main(app: str, factoryreset: bool, app_args: str, script: str, script_args:
'--log-format', '%(message)s'] + shlex.split(script_args)

if script_gdb:
script_command = "gdb -batch -return-child-result -q -ex run -ex bt --args python3".split() + script_command
#
# When running through Popen, we need to preserve some space-delimited args to GDB as a single logical argument. To do that, let's use '|' as a placeholder
# for the space character so that the initial split will not tokenize them, and then replace that with the space char there-after.
#
script_command = "gdb -batch -return-child-result -q -ex run -ex thread|apply|all|bt --args python3".split() + script_command
else:
script_command = "/usr/bin/env python3".split() + script_command

logging.info(f"Execute: {script_command}")
final_script_command = [i.replace('|', ' ') for i in script_command]

logging.info(f"Execute: {final_script_command}")
test_script_process = subprocess.Popen(
script_command, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
final_script_command, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
DumpProgramOutputToQueue(log_cooking_threads, Fore.GREEN + "TEST" + Style.RESET_ALL,
test_script_process, log_queue)

Expand Down
11 changes: 11 additions & 0 deletions src/controller/python/chip/native/CommonStackInit.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -49,6 +49,17 @@ struct __attribute__((packed)) PyCommonStackInitParams
uint32_t mBluetoothAdapterId;
};

/**
* Function to artifically cause a crash to happen
* that can be used in place of os.exit() in Python so that
* when run through GDB, you'll get a backtrace of what happened.
*/
void pychip_CauseCrash()
{
uint8_t * ptr = nullptr;
*ptr = 0;
}

ChipError::StorageType pychip_CommonStackInit(const PyCommonStackInitParams * aParams)
{
ReturnErrorOnFailure(Platform::MemoryInit().AsInteger());
Expand Down
25 changes: 22 additions & 3 deletions src/controller/python/test/test_scripts/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -38,6 +38,7 @@
import chip.FabricAdmin
import copy
import secrets
import faulthandler

logger = logging.getLogger('PythonMatterControllerTEST')
logger.setLevel(logging.INFO)
Expand All @@ -50,9 +51,27 @@
logger.addHandler(sh)


def TestFail(message):
def TestFail(message, doCrash=False):
logger.fatal("Testfail: {}".format(message))
os._exit(1)

if (doCrash):
logger.fatal("--------------------------------")
logger.fatal("Backtrace of all Python threads:")
logger.fatal("--------------------------------")

#
# Let's dump the Python backtrace for all threads, since the backtrace we'll
# get from gdb (if one is attached) won't give us good Python symbol information.
#
faulthandler.dump_traceback()

#
# Cause a crash to happen so that we can actually get a meaningful
# backtrace when run through GDB.
#
chip.native.GetLibraryHandle().pychip_CauseCrash()
else:
os._exit(1)


def FailIfNot(cond, message):
Expand Down Expand Up @@ -143,7 +162,7 @@ def run(self):
self._cv.wait(wait_time)
wait_time = stop_time - time.time()
if time.time() > stop_time:
TestFail("Timeout")
TestFail("Timeout", doCrash=True)


class TestResult:
Expand Down
4 changes: 2 additions & 2 deletions src/test_driver/linux-cirque/MobileDeviceTest.py
Original file line number Diff line number Diff line change
Expand Up @@ -83,7 +83,7 @@ def run_controller_test(self):
if device['type'] == 'MobileDevice']

for server in server_ids:
self.execute_device_cmd(server, "CHIPCirqueDaemon.py -- run gdb -return-child-result -q -ex \"set pagination off\" -ex run -ex \"bt 25\" --args {} --thread --discriminator {}".format(
self.execute_device_cmd(server, "CHIPCirqueDaemon.py -- run gdb -batch -return-child-result -q -ex \"set pagination off\" -ex run -ex \"thread apply all bt\" --args {} --thread --discriminator {}".format(
os.path.join(CHIP_REPO, "out/debug/standalone/chip-all-clusters-app"), TEST_DISCRIMINATOR))

self.reset_thread_devices(server_ids)
Expand All @@ -97,7 +97,7 @@ def run_controller_test(self):
self.execute_device_cmd(req_device_id, "pip3 install {}".format(os.path.join(
CHIP_REPO, "out/debug/linux_x64_gcc/controller/python/chip_repl-0.0-py3-none-any.whl")))

command = "gdb -return-child-result -q -ex run -ex bt --args python3 {} -t 240 -a {} --paa-trust-store-path {}".format(
command = "gdb -batch -return-child-result -q -ex run -ex \"thread apply all bt\" --args python3 {} -t 240 -a {} --paa-trust-store-path {}".format(
os.path.join(
CHIP_REPO, "src/controller/python/test/test_scripts/mobile-device-test.py"), ethernet_ip,
os.path.join(CHIP_REPO, MATTER_DEVELOPMENT_PAA_ROOT_CERTS))
Expand Down

0 comments on commit f50f1bc

Please sign in to comment.