From 69df8bb9a706e47e552d6b6b058c6db2f4bdd28e Mon Sep 17 00:00:00 2001 From: Chris Coleman Date: Sun, 26 Nov 2023 18:27:29 -0500 Subject: [PATCH 1/4] Humble beginnings for thingy91 FOTA ### Summary Get's a FOTA MVP up and running so we can get the latest and greatest on our nRF91 Thingy:91s Change Highlights -- more details inline in the code: 1. I was going insane trying to debug MCUboot when no logs were being emitted. Tinkered around with Kconfig options to get this working. Trade off is I disabled support for mcumgr serial DFU but I do not believe anyone is using this atm anyway. 2. Fixing 1, pointed out that due to the size of the thingy91 image we are hitting an edge case with MCUboot's "swap move" algorithm blocking a FOTA from completing succesfully. I switched to OVERWRITE strategy to work around this. 3. First pass at a FOTA check. The algo works as follows: - Check for FOTA upon first connection since boot - Check for FOTA every 12 hours there-after - If FOTA check on 12 hour mark fails, check upon next successful connection (to accomodate situation where we were not connected) 4. There's some kind of corruption taking place when sending data to cloud _and_ doing a FOTA. We should figure out what's going wrong but for now I fixed it by adding retry logic in our own custom FOTA handler (`CONFIG_MEMFAULT_FOTA_DOWNLOAD_CALLBACK_CUSTOM=y`) ### Internal Documentation https://www.notion.so/memfault/WeFault-Thingy91-Fleet-Quickstart-394ab5bd97ab4a49af86f40423f51982 ### Test Plan 1. Build the new image ``` west build -b thingy91_nrf9160_ns@1.0.0 memfault-asset-tracker -- -DCONFIG_MEMFAULT_NCS_PROJECT_KEY=\"${PROJECT_KEY}\" -DOVERLAY_CONFIG=overlay-memfault.conf -DCONFIG_BUILD_OUTPUT_META=n -Dmcuboot_CONFIG_BUILD_OUTPUT_META=n -DCONFIG_MEMFAULT_FOTA=y -DCONFIG_MEMFAULT_NCS_FW_VERSION_PREFIX=\"0.0.2+\" ``` 2. Flash the image on the board ``` west flash --erase ``` 3. Re-run 1) but with `-DCONFIG_MEMFAULT_NCS_FW_VERSION_PREFIX=\"0.0.3+\" 4. Upload `app_update.bin` from `build/zephyr` folder into wefault project: https://app.memfault.com/organizations/memfault/projects/thingy91-fleet/releases/0.0.3%2Bd8aaaa 5. Confirm FOTA completes successfully by watching console: ``` [00:00:26.520,416] mflt: FOTA Update Available. Starting Download! [00:00:26.521,820] download_client: Downloading: https://ota-cdn.memfault.com/3993/6539/11764076539?token=rFQ0Owi7Cbbf7VP9AOQimNndKD16VO_DdjLDl9ANrYg&expires=1701064800 [0] [00:00:26.521,881] mflt: FOTA In Progress [00:00:26.714,202] download_client: Setting up TLS credentials, sec tag count 3 [00:00:26.714,324] download_client: Connecting to https://ota-cdn.memfault.com/3993/6539/11764076539?token=rFQ0Owi7Cbbf7VP9AOQimNndKD16VO_DdjLDl9ANrYg&expires=1701064800 [00:00:28.714,447] download_client: Downloaded 1024/424428 bytes (0%) [...] [00:07:33.470,214] download_client: Downloaded 316416/424428 bytes (74%) [00:07:35.357,604] app_event_manager: LOCATION_MODULE_EVT_CLOUD_LOCATION_DATA_READY [00:07:35.358,337] app_event_manager: DATA_EVT_DATA_READY [00:07:35.364,715] app_event_manager: DATA_EVT_CLOUD_LOCATION_DATA_SEND [00:07:35.382,873] app_event_manager: DATA_EVT_DATA_SEND_BATCH [00:07:35.383,758] app_event_manager: CLOUD_EVT_DATA_SEND_QOS [00:07:35.384,094] app_event_manager: CLOUD_EVT_CLOUD_LOCATION_UNKNOWN [00:07:35.384,857] app_event_manager: LOCATION_MODULE_EVT_INACTIVE [00:07:35.386,108] app_event_manager: CLOUD_EVT_DATA_SEND_QOS [00:07:40.273,132] download_client: Downloaded 317440/424428 bytes (74%) [00:07:41.878,082] download_client: Unexpected HTTP response: 400 bad request [00:07:41.878,112] fota_download: Download client error [00:07:41.878,143] dfu_target_mcuboot: MCUBoot image upgrade aborted. [00:07:41.878,173] dfu_target_mcuboot: MCUBoot image upgrade aborted. [00:07:41.879,608] mflt: FOTA failed -- trying again ... [00:07:50.251,220] mflt: FOTA Update Available. Starting Download! [...] [00:10:32.878,387] download_client: Downloaded 421888/424428 bytes (99%) [00:10:33.554,199] download_client: Downloaded 422912/424428 bytes (99%) uart:~$ *** Booting nRF Connect SDK dcaa0fbee306 *** I: Starting bootloader I: Image index: 0, Swap type: test I: Image 0 upgrade secondary slot -> primary slot I: Erasing the primary slot I: Image 0 copying the secondary slot to the primary slot: 0x679f0 bytes D: writing magic; fa_id=2 off=0x68ff0 (0x74ff0) D: erasing secondary header D: erasing secondary trailer I: Bootloader chainload address offset: 0xc000 I: Jumping to the first image slot ``` --- child_image/mcuboot.conf | 26 +++++++++++++ prj.conf | 4 ++ src/main.c | 79 ++++++++++++++++++++++++++++++++++++++++ 3 files changed, 109 insertions(+) diff --git a/child_image/mcuboot.conf b/child_image/mcuboot.conf index c87b641..730016c 100644 --- a/child_image/mcuboot.conf +++ b/child_image/mcuboot.conf @@ -7,3 +7,29 @@ # Increase the monotonic firmware version before building a firmware update CONFIG_FW_INFO=y CONFIG_FW_INFO_FIRMWARE_VERSION=1 + +# Note: BOOT_SERIAL and UART_CONSOLE cannot be installed on the same uart +# It looks like there is an unused UART_1 on the thingy where we could +# potentially wire up CONFIG_MCUBOOT_SERIAL=y CONFIG_BOOT_SERIAL_UART=y +# in the future + +CONFIG_MCUBOOT_SERIAL=n +CONFIG_CONSOLE=y +CONFIG_UART_CONSOLE=y +CONFIG_MCUBOOT_LOG_LEVEL_DBG=y + +# Note: The image is nearly full causing some kind of edge case to be hit when swap upgrade +# move strategy is used: +# +# I: Boot source: none +# I: Image index: 0, Swap type: test +# I: Starting swap using move algorithm. +# W: Not enough free space to run swap upgrade +# W: required 430080 bytes but only 425984 are available +# +# For more info about mcuboot upgrade strategies see: +# https://interrupt.memfault.com/blog/mcuboot-overview#swap-mode +# +# For now let's flip on MCUBOOT_OVERWRITE_ONLY mode which has less wonky +# flash requirements (and imo is better for long term flash health) +CONFIG_BOOT_UPGRADE_ONLY=y diff --git a/prj.conf b/prj.conf index 2ef2956..ed23524 100644 --- a/prj.conf +++ b/prj.conf @@ -126,3 +126,7 @@ CONFIG_LOG_DEFAULT_LEVEL=3 # instead of logs being separately viewed in a file via # RTT logger CONFIG_SHELL_LOG_BACKEND=y + +# Let's manage the memfault_fota_download_callback() from +# the asset tracker application rather than using the Memfault default +CONFIG_MEMFAULT_FOTA_DOWNLOAD_CALLBACK_CUSTOM=y \ No newline at end of file diff --git a/src/main.c b/src/main.c index 232b189..86aa3c2 100644 --- a/src/main.c +++ b/src/main.c @@ -25,6 +25,9 @@ #include "events/util_module_event.h" #include "events/modem_module_event.h" +#include "memfault/components.h" +#include "memfault/nrfconnect_port/fota.h" + #include #include @@ -490,6 +493,74 @@ static void on_sub_state_active(struct app_msg_data *msg) } } +/* always check for a new FOTA on first boot and then every timer period */ +static bool s_run_fota_check_upon_lte_connect = true; + +static void prv_memfault_fota_work_handler(struct k_work *work) { + int rv = memfault_fota_start(); + if (rv < 0) { + /* we may have just not been connected, try again on next connect */ + s_run_fota_check_upon_lte_connect = true; + } else if (rv == 0) { + MEMFAULT_LOG_INFO("FOTA up to date"); + } +} +K_WORK_DELAYABLE_DEFINE(s_memfault_fota_work, prv_memfault_fota_work_handler); + +static void prv_run_memfault_fota_check(void) { + k_work_schedule(&s_memfault_fota_work, K_SECONDS(5)); +} + +void memfault_fota_download_callback(const struct fota_download_evt *evt) { + switch (evt->id) { + case FOTA_DOWNLOAD_EVT_FINISHED: + MEMFAULT_LOG_INFO("OTA Complete, resetting to install update!"); + memfault_platform_reboot(); + break; + case FOTA_DOWNLOAD_EVT_ERROR: + /* + * FIXME: When multiple threads are send/read'ing from the modem + * at the same time, we get intermittent corruption issues. We should + * really figure out what is wrong here but for now we just keep retrying. + * + * Example error logs: + * + * [00:04:45.544,891] location: GNSS timed out possibly due to too short GNSS time windows + * [00:04:45.557,037] app_event_manager: LOCATION_MODULE_EVT_CLOUD_LOCATION_DATA_READY + * [00:04:45.557,769] app_event_manager: DATA_EVT_DATA_READY + * [00:04:45.562,713] app_event_manager: DATA_EVT_CLOUD_LOCATION_DATA_SEND + * [00:04:45.580,718] app_event_manager: DATA_EVT_DATA_SEND_BATCH + * [00:04:45.581,604] app_event_manager: CLOUD_EVT_DATA_SEND_QOS + * [00:04:45.581,909] app_event_manager: CLOUD_EVT_CLOUD_LOCATION_UNKNOWN + * [00:04:45.582,672] app_event_manager: LOCATION_MODULE_EVT_INACTIVE + * [00:04:45.583,923] app_event_manager: CLOUD_EVT_DATA_SEND_QOS + * [00:04:46.846,618] download_client: Downloaded 346112/424124 bytes (81%) + * [00:04:47.373,565] download_client: Unexpected HTTP response: 403 forbidden + * [00:04:47.373,596] fota_download: Download client error + * [00:04:47.373,596] fota_download: Download client error + * [00:04:47.373,596] dfu_target_mcuboot: MCUBoot image upgrade aborted. + * [00:04:47.373,657] dfu_target_mcuboot: MCUBoot image upgrade aborted. + * [00:04:47.375,183] FOTA failed -- trying again ... + */ + MEMFAULT_LOG_ERROR("FOTA failed -- trying again ..."); + prv_run_memfault_fota_check(); + break; + default: + break; + } +} + +static void prv_memfault_fota_timer_expiry_handler(struct k_timer *dummy) { + prv_run_memfault_fota_check(); +} + +K_TIMER_DEFINE(s_memfault_fota_timer, prv_memfault_fota_timer_expiry_handler, NULL); + +static void prv_memfault_fota_timer_start(void) { + /* Check and see if new FOTA is available every 12 hours */ + k_timer_start(&s_memfault_fota_timer, K_HOURS(12), K_HOURS(12)); +} + /* Message handler for all states. */ static void on_all_events(struct app_msg_data *msg) { @@ -517,6 +588,13 @@ static void on_all_events(struct app_msg_data *msg) if (IS_EVENT(msg, sensor, SENSOR_EVT_MOVEMENT_IMPACT_DETECTED)) { SEND_EVENT(app, APP_EVT_DATA_GET_ALL); } + + if (IS_EVENT(msg, modem, MODEM_EVT_LTE_CONNECTED)) { + if (s_run_fota_check_upon_lte_connect) { + prv_run_memfault_fota_check(); + s_run_fota_check_upon_lte_connect = false; + } + } } int main(void) @@ -547,6 +625,7 @@ int main(void) LOG_ERR("Failed starting module, error: %d", err); SEND_ERROR(app, APP_EVT_ERROR, err); } + prv_memfault_fota_timer_start(); while (true) { module_get_next_msg(&self, &msg); From 42b16b35665de382ab1947422c172f4aedfd078d Mon Sep 17 00:00:00 2001 From: Gillian Minnehan Date: Wed, 6 Dec 2023 16:24:22 -0500 Subject: [PATCH 2/4] Clean up FOTA update code --- src/main.c | 93 ++++++++++++++++++++++++++++++++++-------------------- 1 file changed, 59 insertions(+), 34 deletions(-) diff --git a/src/main.c b/src/main.c index 86aa3c2..bfd06db 100644 --- a/src/main.c +++ b/src/main.c @@ -89,6 +89,10 @@ static bool activity; */ static void data_sample_timer_handler(struct k_timer *timer); +#if defined(CONFIG_MEMFAULT_FOTA) +static void prv_memfault_fota_timer_expiry_handler(struct k_timer *timer); +#endif + /* Application module message queue. */ #define APP_QUEUE_ENTRY_COUNT 10 #define APP_QUEUE_BYTE_ALIGNMENT 4 @@ -96,6 +100,11 @@ static void data_sample_timer_handler(struct k_timer *timer); /* Data fetching timeouts */ #define DATA_FETCH_TIMEOUT_DEFAULT 2 +#if defined(CONFIG_MEMFAULT_FOTA) +/* FOTA timeouts */ +#define FOTA_CHECK_TIMEOUT_HOURS 12 +#endif + K_MSGQ_DEFINE(msgq_app, sizeof(struct app_msg_data), APP_QUEUE_ENTRY_COUNT, APP_QUEUE_BYTE_ALIGNMENT); @@ -112,6 +121,11 @@ K_TIMER_DEFINE(movement_timeout_timer, data_sample_timer_handler, NULL); */ K_TIMER_DEFINE(movement_resolution_timer, NULL, NULL); +#if defined(CONFIG_MEMFAULT_FOTA) +/* FOTA timer used to trigger an FOTA update if a payload is available. */ +K_TIMER_DEFINE(s_memfault_fota_timer, prv_memfault_fota_timer_expiry_handler, NULL); +#endif + /* Module data structure to hold information of the application module, which * opens up for using convenience functions available for modules. */ @@ -493,10 +507,12 @@ static void on_sub_state_active(struct app_msg_data *msg) } } -/* always check for a new FOTA on first boot and then every timer period */ +#if defined(CONFIG_MEMFAULT_FOTA) +/* Always check for a new FOTA on first boot and then every timer period */ static bool s_run_fota_check_upon_lte_connect = true; -static void prv_memfault_fota_work_handler(struct k_work *work) { +static void prv_memfault_fota_work_handler(struct k_work *work) +{ int rv = memfault_fota_start(); if (rv < 0) { /* we may have just not been connected, try again on next connect */ @@ -511,55 +527,59 @@ static void prv_run_memfault_fota_check(void) { k_work_schedule(&s_memfault_fota_work, K_SECONDS(5)); } -void memfault_fota_download_callback(const struct fota_download_evt *evt) { +void memfault_fota_download_callback(const struct fota_download_evt *evt) +{ switch (evt->id) { case FOTA_DOWNLOAD_EVT_FINISHED: MEMFAULT_LOG_INFO("OTA Complete, resetting to install update!"); memfault_platform_reboot(); break; case FOTA_DOWNLOAD_EVT_ERROR: - /* - * FIXME: When multiple threads are send/read'ing from the modem - * at the same time, we get intermittent corruption issues. We should - * really figure out what is wrong here but for now we just keep retrying. - * - * Example error logs: - * - * [00:04:45.544,891] location: GNSS timed out possibly due to too short GNSS time windows - * [00:04:45.557,037] app_event_manager: LOCATION_MODULE_EVT_CLOUD_LOCATION_DATA_READY - * [00:04:45.557,769] app_event_manager: DATA_EVT_DATA_READY - * [00:04:45.562,713] app_event_manager: DATA_EVT_CLOUD_LOCATION_DATA_SEND - * [00:04:45.580,718] app_event_manager: DATA_EVT_DATA_SEND_BATCH - * [00:04:45.581,604] app_event_manager: CLOUD_EVT_DATA_SEND_QOS - * [00:04:45.581,909] app_event_manager: CLOUD_EVT_CLOUD_LOCATION_UNKNOWN - * [00:04:45.582,672] app_event_manager: LOCATION_MODULE_EVT_INACTIVE - * [00:04:45.583,923] app_event_manager: CLOUD_EVT_DATA_SEND_QOS - * [00:04:46.846,618] download_client: Downloaded 346112/424124 bytes (81%) - * [00:04:47.373,565] download_client: Unexpected HTTP response: 403 forbidden - * [00:04:47.373,596] fota_download: Download client error - * [00:04:47.373,596] fota_download: Download client error - * [00:04:47.373,596] dfu_target_mcuboot: MCUBoot image upgrade aborted. - * [00:04:47.373,657] dfu_target_mcuboot: MCUBoot image upgrade aborted. - * [00:04:47.375,183] FOTA failed -- trying again ... - */ + /* + * FIXME: When multiple threads are send/read'ing from the modem + * at the same time, we get intermittent corruption issues. We should + * really figure out what is wrong here but for now we just keep retrying. + * + * Example error logs: + * + * location: GNSS timed out possibly due to too short GNSS time windows + * app_event_manager: LOCATION_MODULE_EVT_CLOUD_LOCATION_DATA_READY + * app_event_manager: DATA_EVT_DATA_READY + * app_event_manager: DATA_EVT_CLOUD_LOCATION_DATA_SEND + * app_event_manager: DATA_EVT_DATA_SEND_BATCH + * app_event_manager: CLOUD_EVT_DATA_SEND_QOS + * app_event_manager: CLOUD_EVT_CLOUD_LOCATION_UNKNOWN + * app_event_manager: LOCATION_MODULE_EVT_INACTIVE + * app_event_manager: CLOUD_EVT_DATA_SEND_QOS + * download_client: Downloaded 346112/424124 bytes (81%) + * download_client: Unexpected HTTP response: 403 forbidden + * fota_download: Download client error + * fota_download: Download client error + * dfu_target_mcuboot: MCUBoot image upgrade aborted. + * dfu_target_mcuboot: MCUBoot image upgrade aborted. + * FOTA failed -- trying again ... + */ MEMFAULT_LOG_ERROR("FOTA failed -- trying again ..."); prv_run_memfault_fota_check(); break; default: break; - } + } } -static void prv_memfault_fota_timer_expiry_handler(struct k_timer *dummy) { +static void prv_memfault_fota_timer_expiry_handler(struct k_timer *timer) +{ + ARG_UNUSED(timer); prv_run_memfault_fota_check(); } -K_TIMER_DEFINE(s_memfault_fota_timer, prv_memfault_fota_timer_expiry_handler, NULL); - -static void prv_memfault_fota_timer_start(void) { - /* Check and see if new FOTA is available every 12 hours */ - k_timer_start(&s_memfault_fota_timer, K_HOURS(12), K_HOURS(12)); +static void prv_memfault_fota_timer_start(void) +{ + /* Periodically check if new FOTA is available */ + k_timer_start(&s_memfault_fota_timer, K_HOURS(FOTA_CHECK_TIMEOUT_HOURS), + K_HOURS(FOTA_CHECK_TIMEOUT_HOURS)); } +#endif /* CONFIG_MEMFAULT_FOTA */ /* Message handler for all states. */ static void on_all_events(struct app_msg_data *msg) @@ -589,12 +609,14 @@ static void on_all_events(struct app_msg_data *msg) SEND_EVENT(app, APP_EVT_DATA_GET_ALL); } +#if defined(CONFIG_MEMFAULT_FOTA) if (IS_EVENT(msg, modem, MODEM_EVT_LTE_CONNECTED)) { if (s_run_fota_check_upon_lte_connect) { prv_run_memfault_fota_check(); s_run_fota_check_upon_lte_connect = false; } } +#endif /* CONFIG_MEMFAULT_FOTA */ } int main(void) @@ -625,7 +647,10 @@ int main(void) LOG_ERR("Failed starting module, error: %d", err); SEND_ERROR(app, APP_EVT_ERROR, err); } + +#if defined(CONFIG_MEMFAULT_FOTA) prv_memfault_fota_timer_start(); +#endif while (true) { module_get_next_msg(&self, &msg); From 6f2fd1d108359a3b7b7498c5f1071f9872d8ecc7 Mon Sep 17 00:00:00 2001 From: Gillian Minnehan Date: Thu, 7 Dec 2023 10:55:43 -0500 Subject: [PATCH 3/4] Consolidate edits for readability --- src/main.c | 21 ++++++--------------- 1 file changed, 6 insertions(+), 15 deletions(-) diff --git a/src/main.c b/src/main.c index bfd06db..853b762 100644 --- a/src/main.c +++ b/src/main.c @@ -89,10 +89,6 @@ static bool activity; */ static void data_sample_timer_handler(struct k_timer *timer); -#if defined(CONFIG_MEMFAULT_FOTA) -static void prv_memfault_fota_timer_expiry_handler(struct k_timer *timer); -#endif - /* Application module message queue. */ #define APP_QUEUE_ENTRY_COUNT 10 #define APP_QUEUE_BYTE_ALIGNMENT 4 @@ -100,11 +96,6 @@ static void prv_memfault_fota_timer_expiry_handler(struct k_timer *timer); /* Data fetching timeouts */ #define DATA_FETCH_TIMEOUT_DEFAULT 2 -#if defined(CONFIG_MEMFAULT_FOTA) -/* FOTA timeouts */ -#define FOTA_CHECK_TIMEOUT_HOURS 12 -#endif - K_MSGQ_DEFINE(msgq_app, sizeof(struct app_msg_data), APP_QUEUE_ENTRY_COUNT, APP_QUEUE_BYTE_ALIGNMENT); @@ -121,11 +112,6 @@ K_TIMER_DEFINE(movement_timeout_timer, data_sample_timer_handler, NULL); */ K_TIMER_DEFINE(movement_resolution_timer, NULL, NULL); -#if defined(CONFIG_MEMFAULT_FOTA) -/* FOTA timer used to trigger an FOTA update if a payload is available. */ -K_TIMER_DEFINE(s_memfault_fota_timer, prv_memfault_fota_timer_expiry_handler, NULL); -#endif - /* Module data structure to hold information of the application module, which * opens up for using convenience functions available for modules. */ @@ -508,6 +494,10 @@ static void on_sub_state_active(struct app_msg_data *msg) } #if defined(CONFIG_MEMFAULT_FOTA) +/* FOTA timeout */ +#define FOTA_CHECK_TIMEOUT_HOURS 12 +#define FOTA_WORK_ITEM_DELAY_SECONDS 5 + /* Always check for a new FOTA on first boot and then every timer period */ static bool s_run_fota_check_upon_lte_connect = true; @@ -524,7 +514,7 @@ static void prv_memfault_fota_work_handler(struct k_work *work) K_WORK_DELAYABLE_DEFINE(s_memfault_fota_work, prv_memfault_fota_work_handler); static void prv_run_memfault_fota_check(void) { - k_work_schedule(&s_memfault_fota_work, K_SECONDS(5)); + k_work_schedule(&s_memfault_fota_work, K_SECONDS(FOTA_WORK_ITEM_DELAY_SECONDS)); } void memfault_fota_download_callback(const struct fota_download_evt *evt) @@ -572,6 +562,7 @@ static void prv_memfault_fota_timer_expiry_handler(struct k_timer *timer) ARG_UNUSED(timer); prv_run_memfault_fota_check(); } +K_TIMER_DEFINE(s_memfault_fota_timer, prv_memfault_fota_timer_expiry_handler, NULL); static void prv_memfault_fota_timer_start(void) { From 3c481ed3f04166b888959f0cad02cc835feec408 Mon Sep 17 00:00:00 2001 From: Gillian Minnehan Date: Thu, 7 Dec 2023 10:57:17 -0500 Subject: [PATCH 4/4] Add missing newline --- prj.conf | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/prj.conf b/prj.conf index ed23524..dc7e191 100644 --- a/prj.conf +++ b/prj.conf @@ -129,4 +129,4 @@ CONFIG_SHELL_LOG_BACKEND=y # Let's manage the memfault_fota_download_callback() from # the asset tracker application rather than using the Memfault default -CONFIG_MEMFAULT_FOTA_DOWNLOAD_CALLBACK_CUSTOM=y \ No newline at end of file +CONFIG_MEMFAULT_FOTA_DOWNLOAD_CALLBACK_CUSTOM=y