Skip to content

Commit

Permalink
Merge branch 'save_twdt_to_coredump' into 'master'
Browse files Browse the repository at this point in the history
feat(coredump): save twdt panic output to coredump elf file

Closes IDF-908

See merge request espressif/esp-idf!27024
  • Loading branch information
erhankur committed Dec 7, 2023
2 parents 840b2d5 + 58ee206 commit 8923152
Show file tree
Hide file tree
Showing 5 changed files with 257 additions and 96 deletions.
27 changes: 26 additions & 1 deletion components/esp_system/include/esp_task_wdt.h
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
/*
* SPDX-FileCopyrightText: 2015-2022 Espressif Systems (Shanghai) CO LTD
* SPDX-FileCopyrightText: 2015-2023 Espressif Systems (Shanghai) CO LTD
*
* SPDX-License-Identifier: Apache-2.0
*/
Expand Down Expand Up @@ -179,6 +179,31 @@ esp_err_t esp_task_wdt_status(TaskHandle_t task_handle);
*/
void __attribute__((weak)) esp_task_wdt_isr_user_handler(void);

typedef void (*task_wdt_msg_handler)(void *opaque, const char *msg);

/**
* @brief Prints or retrieves information about tasks/users that triggered the Task Watchdog Timeout.
*
* This function provides various operations to handle tasks/users that did not reset the Task Watchdog in time.
* It can print detailed information about these tasks/users, such as their names, associated CPUs, and whether they have been reset.
* Additionally, it can retrieve the total length of the printed information or the CPU affinity of the failing tasks.
*
* @param[in] msg_handler Optional message handler function that will be called for each printed line.
* @param[in] opaque Optional pointer to opaque data that will be passed to the message handler function.
* @param[out] cpus_fail Optional pointer to an integer where the CPU affinity of the failing tasks will be stored.
*
* @return
* - ESP_OK: The function executed successfully.
* - ESP_FAIL: No triggered tasks were found, and thus no information was printed or retrieved.
*
* @note
* - If `msg_handler` is not provided, the information will be printed to console using ESP_EARLY_LOGE.
* - If `msg_handler` is provided, the function will send the printed information to the provided message handler function.
* - If `cpus_fail` is provided, the function will store the CPU affinity of the failing tasks in the provided integer.
* - During the execution of this function, logging is allowed in critical sections, as TWDT timeouts are considered fatal errors.
*/
esp_err_t esp_task_wdt_print_triggered_tasks(task_wdt_msg_handler msg_handler, void *opaque, int *cpus_fail);

#ifdef __cplusplus
}
#endif
131 changes: 77 additions & 54 deletions components/esp_system/task_wdt/task_wdt.c
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@
*/

#include <stdint.h>
#include <string.h>
#include <stdbool.h>
#include <stdio.h>
#include <sys/queue.h>
Expand Down Expand Up @@ -337,6 +338,32 @@ static void subscribe_idle(uint32_t core_mask)
*
*/

static UBaseType_t get_task_affinity(const TaskHandle_t xTask)
{
if (xTask == NULL) {
/* User entry, we cannot predict on which core it is scheduled to run,
* so let's mark all cores as failing */
#if configNUM_CORES > 1
return BIT(1) | BIT(0);
#else
return BIT(0);
#endif
}

#if CONFIG_FREERTOS_SMP
#if configNUM_CORES > 1
return vTaskCoreAffinityGet(xTask);
#else
return BIT(0);
#endif
#else
BaseType_t task_affinity = xTaskGetCoreID(xTask);
if (task_affinity == 0 || task_affinity == 1) {
return BIT(task_affinity);
}
return BIT(1) | BIT(0);
#endif
}

/**
* Function simulating an abort coming from the interrupted task of the current
Expand Down Expand Up @@ -456,65 +483,17 @@ static void task_wdt_isr(void *arg)
portENTER_CRITICAL_ISR(&spinlock);
esp_task_wdt_impl_timeout_triggered(p_twdt_obj->impl_ctx);

// If there are no entries, there's nothing to do.
if (SLIST_EMPTY(&p_twdt_obj->entries_slist)) {
portEXIT_CRITICAL_ISR(&spinlock);
return;
}
// Find what entries triggered the TWDT timeout (i.e., which entries have not been reset)
/*
Note: We are currently in a critical section, thus under normal circumstances, logging should not be allowed.
However, TWDT timeouts count as fatal errors, thus reporting the fatal error is considered more important than
minimizing interrupt latency. Thus we allow logging in critical sections in this narrow case.
*/
ESP_EARLY_LOGE(TAG, "Task watchdog got triggered. The following tasks/users did not reset the watchdog in time:");
twdt_entry_t *entry;
/* Keep a bitmap of CPU cores having tasks that have not reset TWDT.
* Bit 0 represents core 0, bit 1 represents core 1, and so on. */
int cpus_fail = 0;
bool panic = p_twdt_obj->panic;

SLIST_FOREACH(entry, &p_twdt_obj->entries_slist, slist_entry) {
if (!entry->has_reset) {
if (entry->task_handle) {
#if CONFIG_FREERTOS_SMP
#if configNUM_CORES > 1
// Log the task's name and its affinity
const UBaseType_t affinity = vTaskCoreAffinityGet(entry->task_handle);
ESP_EARLY_LOGE(TAG, " - %s (0x%x)", pcTaskGetName(entry->task_handle), affinity);
cpus_fail |= affinity;
#else // configNUM_CORES > 1
// Log the task's name
ESP_EARLY_LOGE(TAG, " - %s", pcTaskGetName(entry->task_handle));
cpus_fail |= BIT(0);
#endif // configNUM_CORES > 1
#else // CONFIG_FREERTOS_SMP
BaseType_t task_affinity = xTaskGetCoreID(entry->task_handle);
const char *cpu;
if (task_affinity == 0) {
cpu = DRAM_STR("CPU 0");
cpus_fail |= BIT(0);
} else if (task_affinity == 1) {
cpu = DRAM_STR("CPU 1");
cpus_fail |= BIT(1);
} else {
cpu = DRAM_STR("CPU 0/1");
cpus_fail |= BIT(1) | BIT(0);
}
ESP_EARLY_LOGE(TAG, " - %s (%s)", pcTaskGetName(entry->task_handle), cpu);
#endif // CONFIG_FREERTOS_SMP
} else {
/* User entry, we cannot predict on which core it is scheduled to run,
* so let's mark all cores as failing */
#if configNUM_CORES > 1
cpus_fail = BIT(1) | BIT(0);
#else // configNUM_CORES > 1
cpus_fail = BIT(0);
#endif // configNUM_CORES > 1
ESP_EARLY_LOGE(TAG, " - %s", entry->user_name);
}
}
}
if (esp_task_wdt_print_triggered_tasks(NULL, NULL, &cpus_fail) != ESP_OK) {
// If there are no entries, there's nothing to do.
portEXIT_CRITICAL_ISR(&spinlock);
return;
}

ESP_EARLY_LOGE(TAG, "%s", DRAM_STR("Tasks currently running:"));
for (int x = 0; x < portNUM_PROCESSORS; x++) {
ESP_EARLY_LOGE(TAG, "CPU %d: %s", x, pcTaskGetName(xTaskGetCurrentTaskHandleForCore(x)));
Expand Down Expand Up @@ -806,3 +785,47 @@ esp_err_t esp_task_wdt_status(TaskHandle_t task_handle)

return ret;
}

esp_err_t esp_task_wdt_print_triggered_tasks(task_wdt_msg_handler msg_handler, void *opaque, int *cpus_fail)
{
if (SLIST_EMPTY(&p_twdt_obj->entries_slist)) {
return ESP_FAIL;
}

twdt_entry_t *entry;
const char *caption = "Task watchdog got triggered. "
"The following tasks/users did not reset the watchdog in time:";

if (msg_handler == NULL) {
ESP_EARLY_LOGE(TAG, "%s", caption);
} else {
msg_handler(opaque, caption);
}

// Find what entries triggered the TWDT timeout (i.e., which entries have not been reset)
SLIST_FOREACH(entry, &p_twdt_obj->entries_slist, slist_entry) {
if (!entry->has_reset) {
const char *cpu;
const char *name = entry->task_handle ? pcTaskGetName(entry->task_handle) : entry->user_name;
const UBaseType_t affinity = get_task_affinity(entry->task_handle);
if (cpus_fail) {
*cpus_fail |= affinity;
}
if (affinity == BIT(0)) {
cpu = " (CPU 0)";
} else if (affinity == BIT(1)) {
cpu = " (CPU 1)";
} else {
cpu = " (CPU 0/1)";
}
if (msg_handler == NULL) {
ESP_EARLY_LOGE(TAG, " - %s%s", name, cpu);
} else {
msg_handler(opaque, "\n - ");
msg_handler(opaque, name);
msg_handler(opaque, cpu);
}
}
}
return ESP_OK;
}
Loading

0 comments on commit 8923152

Please sign in to comment.