From a8ed2bdcb36090a20f00c74afbd6d2a5c3a19469 Mon Sep 17 00:00:00 2001 From: Technobly Date: Wed, 14 Oct 2020 21:26:24 -0500 Subject: [PATCH] [gen2][electron] modem brownout or unexpected reset causes system to think it still is connected [ch64465] --- hal/src/electron/modem/mdm_hal.cpp | 163 +++++++++++++----- hal/src/electron/modem/mdm_hal.h | 9 + .../no_fixture_long_running/cellular.cpp | 50 +++++- 3 files changed, 179 insertions(+), 43 deletions(-) diff --git a/hal/src/electron/modem/mdm_hal.cpp b/hal/src/electron/modem/mdm_hal.cpp index 9b645b4a4b..68e6c989d5 100644 --- a/hal/src/electron/modem/mdm_hal.cpp +++ b/hal/src/electron/modem/mdm_hal.cpp @@ -53,6 +53,7 @@ std::recursive_mutex mdm_mutex; #define MAX_SIZE 1024 //!< max expected messages (used with RX) #define USO_MAX_WRITE 1024 //!< maximum number of bytes to write to socket (used with TX) #define MDM_URC_POLL_INTERVAL_MS (1000) //!< URC poll interval +#define MDM_C_REG_POLL_INTERVAL_MS (30000) //!< CREG/CGREG/CEREG poll interval to check for unexpected modem reset #define MDM_SOCKET_SEND_RETRIES_R4_BUG (3) // ID of the PDP context used to configure the default EPS bearer when registering in an LTE network @@ -74,28 +75,28 @@ std::recursive_mutex mdm_mutex; #define USOST_TIMEOUT ( 10 * 1000) /* 10s for R4, 1s for U2/G3, changed to 40s due to R410 firmware */ #define USOST_TIMEOUT_R4 ( 40 * 1000) /* L0.0.00.00.05.08,A.02.04 background DNS lookup and other factors */ #define USORF_TIMEOUT ( 10 * 1000) /* FIXME: 1s for R4/U2/G3, but longer timeouts required in deployments */ +#define CGATT_TIMEOUT (180 * 1000) +#define CEDRXS_TIMEOUT ( 10 * 1000) #define CEER_TIMEOUT ( 10 * 1000) -#define CEREG_TIMEOUT ( 60 * 1000) +#define CFUN_TIMEOUT (180 * 1000) #define CGDCONT_TIMEOUT ( 10 * 1000) -#define CGREG_TIMEOUT ( 60 * 1000) -#define CGATT_TIMEOUT (180 * 1000) +#define CIMI_TIMEOUT ( 10 * 1000) /* Should be immediate, but have observed 3 seconds occassionally on u-blox and rarely longer times */ #define CMGS_TIMEOUT (150 * 1000) /* 180s for R4 (set to 150s to match previous implementation) */ #define COPS_TIMEOUT (180 * 1000) #define CPWROFF_TIMEOUT ( 40 * 1000) -#define CREG_TIMEOUT ( 60 * 1000) #define CSQ_TIMEOUT ( 10 * 1000) +#define CREG_TIMEOUT ( 60 * 1000) +#define CGREG_TIMEOUT ( 60 * 1000) +#define CEREG_TIMEOUT ( 60 * 1000) #define UBANDSEL_TIMEOUT ( 40 * 1000) #define UBANDMASK_TIMEOUT ( 10 * 1000) +#define UCGED_TIMEOUT ( 10 * 1000) #define UDNSRN_TIMEOUT ( 30 * 1000) /* 70s for R4 (set to 30s to match previous implementation) */ -#define URAT_TIMEOUT ( 10 * 1000) +#define UMNOPROF_TIMEOUT ( 10 * 1000) #define UPSD_TIMEOUT ( 10 * 1000) #define UPSDA_TIMEOUT (180 * 1000) #define UPSND_TIMEOUT ( 10 * 1000) -#define UMNOPROF_TIMEOUT ( 10 * 1000) -#define CEDRXS_TIMEOUT ( 10 * 1000) -#define CFUN_TIMEOUT (180 * 1000) -#define UCGED_TIMEOUT ( 10 * 1000) -#define CIMI_TIMEOUT ( 10 * 1000) /* Should be immediate, but have observed 3 seconds occassionally on u-blox and rarely longer times */ +#define URAT_TIMEOUT ( 10 * 1000) // num sockets #define NUMSOCKETS ((int)(sizeof(_sockets)/sizeof(*_sockets))) @@ -248,6 +249,10 @@ MDMParser::MDMParser(void) inst = this; memset(&_dev, 0, sizeof(_dev)); memset(&_net, 0, sizeof(_net)); + memset(_sockets, 0, sizeof(_sockets)); + for (int socket = 0; socket < NUMSOCKETS; socket ++) { + _sockets[socket].handle = MDM_SOCKET_ERROR; + } _net.cgi.location_area_code = 0xFFFF; _net.cgi.cell_id = 0xFFFFFFFF; _ip = NOIP; @@ -263,11 +268,13 @@ MDMParser::MDMParser(void) sms_cb = NULL; _timePowerOn = 0; _timeRegistered = 0; - memset(_sockets, 0, sizeof(_sockets)); - for (int socket = 0; socket < NUMSOCKETS; socket ++) - _sockets[socket].handle = MDM_SOCKET_ERROR; - _error = 0; + _lastVerboseC_regUpdate = 0; + _verboseCregEnabled = false; + _verboseCgregEnabled = false; + _verboseCeregEnabled = false; _lastProcess = 0; + _error = 0; + _reinitModem = 0xFF; #ifdef MDM_DEBUG _debugLevel = 3; #endif @@ -408,6 +415,7 @@ bool MDMParser::_checkModem(bool force /* = true */) { } int MDMParser::process() { + if (!(_init && _pwr)) { return 0; } @@ -418,34 +426,38 @@ int MDMParser::process() { { LOCK(); - if (!_checkModem(false /* force */)) { + if (_reinitModem == 0x55 || !_checkModem(false /* force */)) { + if (_reinitModem != 0x55) { + MDM_ERROR("[ Modem not responsive ]\r\n"); + } + _reinitModem = 0xFF; // We've notified system layer that we'll require a modem reset // This will get reset once the system (or something else) calls on() _cancel_all_operations = true; - // Short-circuit disconnect() call - _ip = NOIP; + _ip = NOIP; // Short-circuit disconnect() call _attached = false; _activated = false; HAL_NET_notify_disconnected(); - MDM_ERROR("[ Modem not responsive ]\r\n"); return -1; } } if ((HAL_Timer_Get_Milli_Seconds() - _lastProcess) >= MDM_URC_POLL_INTERVAL_MS) { LOCK(); - // Process URCs - waitFinalResp(nullptr, nullptr, 0); + waitFinalResp(nullptr, nullptr, 0); // Process URCs _lastProcess = HAL_Timer_Get_Milli_Seconds(); } if (_activated && !_attached) { LOCK(); if (!reconnect()) { - return -1; + return -2; } } + // Periodically check if C*REG URCs are still enabled, will cause modem re-init if not. + _checkVerboseC_reg(); + return 0; } @@ -501,7 +513,7 @@ int MDMParser::waitFinalResp(_CALLBACKPTR cb /* = NULL*/, // handle unsolicited commands here if (type == TYPE_PLUS) { const char* cmd = buf+3; // assume all TYPE_PLUS commands have \r\n+ and skip - int a, b, c, d, r; + int mode, a, b, c, d, r; char s[32]; // SMS Command --------------------------------- @@ -565,13 +577,14 @@ int MDMParser::waitFinalResp(_CALLBACKPTR cb /* = NULL*/, HAL_NET_notify_dhcp(false); } } else { - // +CREG|CGREG: ,[,,[,AcT[,]]] // reply to AT+CREG|AT+CGREG - // +CREG|CGREG: [,,[,AcT[,]]] // URC - b = (int)0xFFFF; c = (int)0xFFFFFFFF; d = -1; - r = sscanf(cmd, "%31s %*d,%d,\"%x\",\"%x\",%d",s,&a,&b,&c,&d); + // +CREG|CGREG: ,[,,[,AcT[,]]] // reply to AT+CREG|AT+CGREG (2,4,5,6 results) + // +CREG|CGREG: [,,[,AcT[,]]] // URC (1,3,4,5 results) + b = (int)0xFFFF; c = (int)0xFFFFFFFF; d = -1; mode = 2; + r = sscanf(cmd, "%31s %d,%d,\"%x\",\"%x\",%d",s,&mode,&a,&b,&c,&d); if (r <= 1) r = sscanf(cmd, "%31s %d,\"%x\",\"%x\",%d",s,&a,&b,&c,&d); if (r >= 2) { + if (r == 2) mode = 2; // Single URC , so fix mode that got overwritten in the first sscanf Reg *reg = !strcmp(s, "CREG:") ? &_net.csd : !strcmp(s, "CGREG:") ? &_net.psd : !strcmp(s, "CEREG:") ? &_net.eps : NULL; @@ -584,13 +597,32 @@ int MDMParser::waitFinalResp(_CALLBACKPTR cb /* = NULL*/, else if (a == 4) *reg = REG_UNKNOWN; // 4: unknown else if (a == 5) *reg = REG_ROAMING; // 5: registered, roaming - // R410 : Look out for a CEREG: de-registration URC while attached - if (_dev.dev == DEV_SARA_R410 && _attached && !REG_OK(*reg) && !strcmp(s, "CEREG:")) { - MDM_PRINTF("Cell de-registered\r\n"); - _attached_urc = 0; - _ip = NOIP; - _attached = false; - HAL_NET_notify_disconnected(); + _reinitModem = 0xFF; + if (mode == 0) { + if (_dev.dev == DEV_SARA_R410) { + if (_verboseCeregEnabled && !strcmp(s, "CEREG:")) { + _reinitModem = 0x55; + } + } else { + if (_verboseCregEnabled && !strcmp(s, "CREG:")) { + _reinitModem = 0x55; + } else if (_verboseCgregEnabled && !strcmp(s, "CGREG:")) { + _reinitModem = 0x55; + } + } + } + if (_reinitModem == 0x55) { + MDM_ERROR("[ Modem unexpectedly reset ]\r\n"); + _lastVerboseC_regUpdate = 0; + _verboseCregEnabled = false; + _verboseCgregEnabled = false; + _verboseCeregEnabled = false; + // reinit handled in MDMParser::process() + } else if (_dev.dev == DEV_SARA_R410 && _attached && !REG_OK(*reg) && !strcmp(s, "CEREG:")) { + // R410 : Look out for a CEREG: de-registration URC while attached + MDM_ERROR("[ Cell de-registered ]\r\n"); + _reinitModem = 0x55; + // reinit handled in MDMParser::process() } if ((r >= 3) && (b != (int)0xFFFF)) _net.cgi.location_area_code = b; @@ -752,6 +784,11 @@ void MDMParser::reset(void) _timeRegistered = 0; // Increment the state change counter _incModemStateChangeCount(); + // Reset our verbose C*REG variables + _lastVerboseC_regUpdate = 0; + _verboseCregEnabled = false; + _verboseCgregEnabled = false; + _verboseCeregEnabled = false; } bool MDMParser::_powerOn(void) @@ -1211,11 +1248,12 @@ bool MDMParser::powerState(void) const { bool MDMParser::powerOff(void) { LOCK(); + const char * POWER_OFF_MSG = "\r\n[ Modem::powerOff ]"; bool ok = false; bool continue_cancel = false; bool check_ri = false; if (_init && _pwr) { - MDM_INFO("\r\n[ Modem::powerOff ] = = = = = = = = = = = = = ="); + MDM_INFO("%s = = = = = = = = = = = = = =", POWER_OFF_MSG); if (_cancel_all_operations) { continue_cancel = true; resume(); // make sure we can use the AT parser @@ -1229,7 +1267,7 @@ bool MDMParser::powerOff(void) // to avoid hitting the 124 day memory housekeeping issue, AT+CPWROFF will // handle this delay automatically, or timeout after 40s. if (_memoryIssuePresent) { - MDM_INFO("\r\n[ Modem::powerOff ] Modem not responsive, waiting up to 30s to power off with PWR_UC..."); + MDM_INFO("%s Modem not responsive, waiting up to 30s to power off with PWR_UC...", POWER_OFF_MSG); system_tick_t now = HAL_Timer_Get_Milli_Seconds(); if (_timePowerOn == 0) { // fallback to max timeout of 30s to be safe @@ -1253,7 +1291,7 @@ bool MDMParser::powerOff(void) _timePowerOn = 0; } } - MDM_INFO("\r\n[ Modem::powerOff ] Modem not responsive, trying PWR_UC..."); + MDM_INFO("%s Modem not responsive, trying PWR_UC...", POWER_OFF_MSG); // Skip power off sequence if power is already off if (!powerState()) { break; @@ -1274,16 +1312,16 @@ bool MDMParser::powerOff(void) } break; } else if (RESP_ABORTED == ret) { - MDM_INFO("\r\n[ Modem::powerOff ] found ABORTED, retrying..."); + MDM_INFO("%s found ABORTED, retrying...", POWER_OFF_MSG); } else { - MDM_INFO("\r\n[ Modem::powerOff ] timeout, retrying..."); + MDM_INFO("%s timeout, retrying...", POWER_OFF_MSG); } } } } else { if (powerState()) { check_ri = true; - MDM_INFO("[ Modem::powerOff ] modem is on unexpectedly. Turning it off..."); + MDM_INFO("%s modem is on unexpectedly. Turning it off...", POWER_OFF_MSG); // Delay this long period so that the modem is fully on to accept the power-off sequence. HAL_Delay_Milliseconds(12000); @@ -1307,9 +1345,9 @@ bool MDMParser::powerOff(void) _pwr = false; } if (!power_state) { - MDM_INFO("\r\n[ Modem::powerOff ] took %lu ms", HAL_Timer_Get_Milli_Seconds() - t0); + MDM_INFO("%s took %lu ms", POWER_OFF_MSG, HAL_Timer_Get_Milli_Seconds() - t0); } else { - MDM_INFO("\r\n[ Modem::powerOff ] failed"); + MDM_INFO("%s failed", POWER_OFF_MSG); } } else { _pwr = false; @@ -1318,9 +1356,13 @@ bool MDMParser::powerOff(void) //_attached = false; } HAL_Delay_Milliseconds(1000); // give peace a chance - // Increment the state change counter to show that the modem has been powered off -> on + // Increment the state change counter to show that the modem has been powered on -> off if (!_pwr) { _incModemStateChangeCount(); + _lastVerboseC_regUpdate = 0; + _verboseCregEnabled = false; + _verboseCgregEnabled = false; + _verboseCeregEnabled = false; } // Close serial connection @@ -1427,6 +1469,38 @@ int MDMParser::_cbCCID(int type, const char* buf, int len, CStringHelper* str) return WAIT; } +void MDMParser::_checkVerboseC_reg(void) { + if (!_attached) { + return; + } + + if (_lastVerboseC_regUpdate && + (HAL_Timer_Get_Milli_Seconds() - _lastVerboseC_regUpdate) >= MDM_C_REG_POLL_INTERVAL_MS) + { + LOCK(); + if (!_checkModem()) { + return; + } + _lastVerboseC_regUpdate = HAL_Timer_Get_Milli_Seconds(); + // Check if C*REG URCs are enabled, handled in waitFinalResp() + if (_dev.dev == DEV_SARA_R410) { + if (_verboseCeregEnabled) { + sendFormated("AT+CEREG?\r\n"); + waitFinalResp(nullptr, nullptr, CEREG_TIMEOUT); + } + } else { + if (_verboseCregEnabled) { + sendFormated("AT+CREG?\r\n"); + waitFinalResp(nullptr, nullptr, CREG_TIMEOUT); + } + if (_verboseCgregEnabled) { + sendFormated("AT+CGREG?\r\n"); + waitFinalResp(nullptr, nullptr, CGREG_TIMEOUT); + } + } + } +} + bool MDMParser::_checkEpsReg(void) { // On the SARA R410M check EPS registration if (_dev.dev == DEV_SARA_R410) { @@ -1487,6 +1561,8 @@ bool MDMParser::registerNet(const char* apn, NetStatus* status, system_tick_t ti if (RESP_OK != waitFinalResp(nullptr, nullptr, CEREG_TIMEOUT)) { goto failure; } + _verboseCeregEnabled = true; + _lastVerboseC_regUpdate = HAL_Timer_Get_Milli_Seconds(); } else { // Set up the GPRS network registration URC sendFormated("AT+CGREG=2\r\n"); @@ -1498,6 +1574,9 @@ bool MDMParser::registerNet(const char* apn, NetStatus* status, system_tick_t ti if (RESP_OK != waitFinalResp(nullptr, nullptr, CREG_TIMEOUT)) { goto failure; } + _verboseCgregEnabled = true; + _verboseCregEnabled = true; + _lastVerboseC_regUpdate = HAL_Timer_Get_Milli_Seconds(); } if (!(ok = checkNetStatus())) { if (_dev.dev == DEV_SARA_R410) { diff --git a/hal/src/electron/modem/mdm_hal.h b/hal/src/electron/modem/mdm_hal.h index 80d4f51254..933983caef 100644 --- a/hal/src/electron/modem/mdm_hal.h +++ b/hal/src/electron/modem/mdm_hal.h @@ -553,6 +553,9 @@ class MDMParser static int _cbCPIN(int type, const char* buf, int len, Sim* sim); static int _cbCCID(int type, const char* buf, int len, CStringHelper* ccid); // network + static int _cbCREG(int type, const char* buf, int len, int* mode); + static int _cbCGREG(int type, const char* buf, int len, int* mode); + static int _cbCEREG(int type, const char* buf, int len, int* mode); static int _cbUMNOPROF(int type, const char *buf, int len, int* i); static int _cbCFUN(int type, const char* buf, int len, int* i); static int _cbUBANDMASK(int type, const char *buf, int len, uint64_t* i); @@ -648,6 +651,7 @@ class MDMParser int _checkAtResponse(void); bool _atOk(void); bool _checkModem(bool force = true); + void _checkVerboseC_reg(void); bool _checkEpsReg(void); int _socketError(void); static MDMParser* inst; @@ -661,6 +665,11 @@ class MDMParser volatile bool _cancel_all_operations; volatile uint32_t _error; system_tick_t _lastProcess; + system_tick_t _lastVerboseC_regUpdate; + bool _verboseCregEnabled; + bool _verboseCgregEnabled; + bool _verboseCeregEnabled; + int _reinitModem; #ifdef MDM_DEBUG int _debugLevel; void _debugPrint(int level, const char* color, const char* format, ...) __attribute__((format(printf, 4, 5))); diff --git a/user/tests/wiring/no_fixture_long_running/cellular.cpp b/user/tests/wiring/no_fixture_long_running/cellular.cpp index 242a5a0778..5680905b9d 100644 --- a/user/tests/wiring/no_fixture_long_running/cellular.cpp +++ b/user/tests/wiring/no_fixture_long_running/cellular.cpp @@ -114,7 +114,55 @@ test(CELLULAR_02_device_will_connect_to_the_cloud_when_all_udp_sockets_consumed) // Then the device overcomes this socket obstacle and connects to the Cloud assertEqual(Particle.connected(), true); } - #endif // !HAL_USE_SOCKET_HAL_POSIX +test(CELLULAR_03_device_will_reconnect_to_the_cloud_when_unexpected_modem_brown_out_occurs_within_2min) { +#if PLATFORM_ID != PLATFORM_ELECTRON + skip(); + return; +#endif + // Serial.println("the device will reconnect to the cloud when unexpected modem brown out occurs within 2min"); + // Given the device is currently connected to the Cloud + connect_to_cloud(6*60*1000); + // When the device modem suffers a simulated brown out (modem reset and powered back on with no initialization) + digitalWrite(RESET_UC, 0); + delay(10000); + digitalWrite(RESET_UC, 1); + do { + // SARA-U2/LISA-U2 50..80us + digitalWrite(PWR_UC, 0); delay(50); + digitalWrite(PWR_UC, 1); delay(10); + + // SARA-G35 >5ms, LISA-C2 > 150ms, LEON-G2 >5ms, SARA-R4 >= 150ms + digitalWrite(PWR_UC, 0); delay(150); + digitalWrite(PWR_UC, 1); delay(100); + } while (RESP_OK != Cellular.command(1000, "AT\r\n")); + // Then the device overcomes this brown out obstacle and reconnects to the Cloud within 2 minutes + system_tick_t start = millis(); + waitFor(Particle.disconnected, 60*1000); + assertEqual(Particle.disconnected, true); + waitFor(Particle.connected, 2*60*1000 - (millis() - start)); + assertEqual(Particle.connected(), true); +} + +test(CELLULAR_04_device_will_reconnect_to_the_cloud_when_unexpected_modem_reset_occurs_within_2min) { +#if PLATFORM_ID != PLATFORM_ELECTRON + skip(); + return; +#endif + // Serial.println("the device will reconnect to the cloud when unexpected modem reset occurs within 2min"); + // Given the device is currently connected to the Cloud + connect_to_cloud(6*60*1000); + // When the device modem suffers a simulated brown out (modem reset and powered back on with no initialization) + digitalWrite(RESET_UC, 0); + delay(10000); + digitalWrite(RESET_UC, 1); + // Then the device overcomes this reset obstacle and reconnects to the Cloud within 2 minutes + system_tick_t start = millis(); + waitFor(Particle.disconnected, 60*1000); + assertEqual(Particle.disconnected, true); + waitFor(Particle.connected, 2*60*1000 - (millis() - start)); + assertEqual(Particle.connected(), true); +} + #endif // Wiring_Cellular