diff --git a/edgelet/Cargo.lock b/edgelet/Cargo.lock index 6f7fdef4389..deb08a1128b 100755 --- a/edgelet/Cargo.lock +++ b/edgelet/Cargo.lock @@ -1,13 +1,5 @@ # This file is automatically @generated by Cargo. # It is not intended for manual editing. -[[package]] -name = "aho-corasick" -version = "0.5.3" -source = "registry+https://github.com/rust-lang/crates.io-index" -dependencies = [ - "memchr 0.1.11 (registry+https://github.com/rust-lang/crates.io-index)", -] - [[package]] name = "aho-corasick" version = "0.6.4" @@ -195,7 +187,7 @@ dependencies = [ "lazy_static 1.3.0 (registry+https://github.com/rust-lang/crates.io-index)", "nom 3.2.1 (registry+https://github.com/rust-lang/crates.io-index)", "serde 1.0.84 (registry+https://github.com/rust-lang/crates.io-index)", - "serde-hjson 0.8.1 (registry+https://github.com/rust-lang/crates.io-index)", + "serde-hjson 0.8.2 (registry+https://github.com/rust-lang/crates.io-index)", "serde_json 1.0.27 (registry+https://github.com/rust-lang/crates.io-index)", "toml 0.4.6 (registry+https://github.com/rust-lang/crates.io-index)", "yaml-rust 0.4.0 (registry+https://github.com/rust-lang/crates.io-index)", @@ -1113,14 +1105,6 @@ name = "matches" version = "0.1.6" source = "registry+https://github.com/rust-lang/crates.io-index" -[[package]] -name = "memchr" -version = "0.1.11" -source = "registry+https://github.com/rust-lang/crates.io-index" -dependencies = [ - "libc 0.2.42 (registry+https://github.com/rust-lang/crates.io-index)", -] - [[package]] name = "memchr" version = "1.0.2" @@ -1466,36 +1450,39 @@ dependencies = [ [[package]] name = "regex" -version = "0.1.80" +version = "0.2.11" source = "registry+https://github.com/rust-lang/crates.io-index" dependencies = [ - "aho-corasick 0.5.3 (registry+https://github.com/rust-lang/crates.io-index)", - "memchr 0.1.11 (registry+https://github.com/rust-lang/crates.io-index)", - "regex-syntax 0.3.9 (registry+https://github.com/rust-lang/crates.io-index)", - "thread_local 0.2.7 (registry+https://github.com/rust-lang/crates.io-index)", - "utf8-ranges 0.1.3 (registry+https://github.com/rust-lang/crates.io-index)", + "aho-corasick 0.6.4 (registry+https://github.com/rust-lang/crates.io-index)", + "memchr 2.0.1 (registry+https://github.com/rust-lang/crates.io-index)", + "regex-syntax 0.5.6 (registry+https://github.com/rust-lang/crates.io-index)", + "thread_local 0.3.5 (registry+https://github.com/rust-lang/crates.io-index)", + "utf8-ranges 1.0.0 (registry+https://github.com/rust-lang/crates.io-index)", ] [[package]] name = "regex" -version = "0.2.11" +version = "1.0.2" source = "registry+https://github.com/rust-lang/crates.io-index" dependencies = [ "aho-corasick 0.6.4 (registry+https://github.com/rust-lang/crates.io-index)", "memchr 2.0.1 (registry+https://github.com/rust-lang/crates.io-index)", - "regex-syntax 0.5.6 (registry+https://github.com/rust-lang/crates.io-index)", + "regex-syntax 0.6.6 (registry+https://github.com/rust-lang/crates.io-index)", "thread_local 0.3.5 (registry+https://github.com/rust-lang/crates.io-index)", "utf8-ranges 1.0.0 (registry+https://github.com/rust-lang/crates.io-index)", ] [[package]] name = "regex-syntax" -version = "0.3.9" +version = "0.5.6" source = "registry+https://github.com/rust-lang/crates.io-index" +dependencies = [ + "ucd-util 0.1.1 (registry+https://github.com/rust-lang/crates.io-index)", +] [[package]] name = "regex-syntax" -version = "0.5.6" +version = "0.6.6" source = "registry+https://github.com/rust-lang/crates.io-index" dependencies = [ "ucd-util 0.1.1 (registry+https://github.com/rust-lang/crates.io-index)", @@ -1575,13 +1562,13 @@ source = "registry+https://github.com/rust-lang/crates.io-index" [[package]] name = "serde-hjson" -version = "0.8.1" +version = "0.8.2" source = "registry+https://github.com/rust-lang/crates.io-index" dependencies = [ "lazy_static 0.2.11 (registry+https://github.com/rust-lang/crates.io-index)", "linked-hash-map 0.3.0 (registry+https://github.com/rust-lang/crates.io-index)", "num-traits 0.1.43 (registry+https://github.com/rust-lang/crates.io-index)", - "regex 0.1.80 (registry+https://github.com/rust-lang/crates.io-index)", + "regex 1.0.2 (registry+https://github.com/rust-lang/crates.io-index)", "serde 0.8.23 (registry+https://github.com/rust-lang/crates.io-index)", ] @@ -1785,23 +1772,6 @@ dependencies = [ "unicode-width 0.1.4 (registry+https://github.com/rust-lang/crates.io-index)", ] -[[package]] -name = "thread-id" -version = "2.0.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -dependencies = [ - "kernel32-sys 0.2.2 (registry+https://github.com/rust-lang/crates.io-index)", - "libc 0.2.42 (registry+https://github.com/rust-lang/crates.io-index)", -] - -[[package]] -name = "thread_local" -version = "0.2.7" -source = "registry+https://github.com/rust-lang/crates.io-index" -dependencies = [ - "thread-id 2.0.0 (registry+https://github.com/rust-lang/crates.io-index)", -] - [[package]] name = "thread_local" version = "0.3.5" @@ -2114,11 +2084,6 @@ dependencies = [ "url 1.7.0 (registry+https://github.com/rust-lang/crates.io-index)", ] -[[package]] -name = "utf8-ranges" -version = "0.1.3" -source = "registry+https://github.com/rust-lang/crates.io-index" - [[package]] name = "utf8-ranges" version = "1.0.0" @@ -2264,7 +2229,6 @@ dependencies = [ ] [metadata] -"checksum aho-corasick 0.5.3 (registry+https://github.com/rust-lang/crates.io-index)" = "ca972c2ea5f742bfce5687b9aef75506a764f61d37f8f649047846a9686ddb66" "checksum aho-corasick 0.6.4 (registry+https://github.com/rust-lang/crates.io-index)" = "d6531d44de723825aa81398a6415283229725a00fa30713812ab9323faa82fc4" "checksum ansi_term 0.11.0 (registry+https://github.com/rust-lang/crates.io-index)" = "ee49baf6cb617b853aa8d93bf420db2383fab46d314482ca2803b40d5fde979b" "checksum argon2rs 0.2.5 (registry+https://github.com/rust-lang/crates.io-index)" = "3f67b0b6a86dae6e67ff4ca2b6201396074996379fba2b92ff649126f37cb392" @@ -2338,7 +2302,6 @@ dependencies = [ "checksum linked-hash-map 0.5.1 (registry+https://github.com/rust-lang/crates.io-index)" = "70fb39025bc7cdd76305867c4eccf2f2dcf6e9a57f5b21a93e1c2d86cd03ec9e" "checksum log 0.4.5 (registry+https://github.com/rust-lang/crates.io-index)" = "d4fcce5fa49cc693c312001daf1d13411c4a5283796bac1084299ea3e567113f" "checksum matches 0.1.6 (registry+https://github.com/rust-lang/crates.io-index)" = "100aabe6b8ff4e4a7e32c1c13523379802df0772b82466207ac25b013f193376" -"checksum memchr 0.1.11 (registry+https://github.com/rust-lang/crates.io-index)" = "d8b629fb514376c675b98c1421e80b151d3817ac42d7c667717d282761418d20" "checksum memchr 1.0.2 (registry+https://github.com/rust-lang/crates.io-index)" = "148fab2e51b4f1cfc66da2a7c32981d1d3c083a803978268bb11fe4b86925e7a" "checksum memchr 2.0.1 (registry+https://github.com/rust-lang/crates.io-index)" = "796fba70e76612589ed2ce7f45282f5af869e0fdd7cc6199fa1aa1f1d591ba9d" "checksum memoffset 0.2.1 (registry+https://github.com/rust-lang/crates.io-index)" = "0f9dc261e2b62d7a622bf416ea3c5245cdd5d9a7fcc428c0d06804dfce1775b3" @@ -2374,10 +2337,10 @@ dependencies = [ "checksum redox_syscall 0.1.37 (registry+https://github.com/rust-lang/crates.io-index)" = "0d92eecebad22b767915e4d529f89f28ee96dbbf5a4810d2b844373f136417fd" "checksum redox_termios 0.1.1 (registry+https://github.com/rust-lang/crates.io-index)" = "7e891cfe48e9100a70a3b6eb652fef28920c117d366339687bd5576160db0f76" "checksum redox_users 0.2.0 (registry+https://github.com/rust-lang/crates.io-index)" = "214a97e49be64fd2c86f568dd0cb2c757d2cc53de95b273b6ad0a1c908482f26" -"checksum regex 0.1.80 (registry+https://github.com/rust-lang/crates.io-index)" = "4fd4ace6a8cf7860714a2c2280d6c1f7e6a413486c13298bbc86fd3da019402f" "checksum regex 0.2.11 (registry+https://github.com/rust-lang/crates.io-index)" = "9329abc99e39129fcceabd24cf5d85b4671ef7c29c50e972bc5afe32438ec384" -"checksum regex-syntax 0.3.9 (registry+https://github.com/rust-lang/crates.io-index)" = "f9ec002c35e86791825ed294b50008eea9ddfc8def4420124fbc6b08db834957" +"checksum regex 1.0.2 (registry+https://github.com/rust-lang/crates.io-index)" = "5bbbea44c5490a1e84357ff28b7d518b4619a159fed5d25f6c1de2d19cc42814" "checksum regex-syntax 0.5.6 (registry+https://github.com/rust-lang/crates.io-index)" = "7d707a4fa2637f2dca2ef9fd02225ec7661fe01a53623c1e6515b6916511f7a7" +"checksum regex-syntax 0.6.6 (registry+https://github.com/rust-lang/crates.io-index)" = "dcfd8681eebe297b81d98498869d4aae052137651ad7b96822f09ceb690d0a96" "checksum remove_dir_all 0.5.1 (registry+https://github.com/rust-lang/crates.io-index)" = "3488ba1b9a2084d38645c4c08276a1752dcbf2c7130d74f1569681ad5d2799c5" "checksum rustc-demangle 0.1.7 (registry+https://github.com/rust-lang/crates.io-index)" = "11fb43a206a04116ffd7cfcf9bcb941f8eb6cc7ff667272246b0a1c74259a3cb" "checksum ryu 0.2.7 (registry+https://github.com/rust-lang/crates.io-index)" = "eb9e9b8cde282a9fe6a42dd4681319bfb63f121b8a8ee9439c6f4107e58a46f7" @@ -2389,7 +2352,7 @@ dependencies = [ "checksum security-framework-sys 0.2.1 (registry+https://github.com/rust-lang/crates.io-index)" = "ab01dfbe5756785b5b4d46e0289e5a18071dfa9a7c2b24213ea00b9ef9b665bf" "checksum serde 0.8.23 (registry+https://github.com/rust-lang/crates.io-index)" = "9dad3f759919b92c3068c696c15c3d17238234498bbdcc80f2c469606f948ac8" "checksum serde 1.0.84 (registry+https://github.com/rust-lang/crates.io-index)" = "0e732ed5a5592c17d961555e3b552985baf98d50ce418b7b655f31f6ba7eb1b7" -"checksum serde-hjson 0.8.1 (registry+https://github.com/rust-lang/crates.io-index)" = "7a2376ebb8976138927f48b49588ef73cde2f6591b8b3df22f4063e0f27b9bec" +"checksum serde-hjson 0.8.2 (registry+https://github.com/rust-lang/crates.io-index)" = "0b833c5ad67d52ced5f5938b2980f32a9c1c5ef047f0b4fb3127e7a423c76153" "checksum serde_derive 1.0.43 (registry+https://github.com/rust-lang/crates.io-index)" = "aa113e5fc4b008a626ba2bbd41330b56c9987d667f79f7b243e5a2d03d91ed1c" "checksum serde_derive_internals 0.23.1 (registry+https://github.com/rust-lang/crates.io-index)" = "9d30c4596450fd7bbda79ef15559683f9a79ac0193ea819db90000d7e1cae794" "checksum serde_json 1.0.27 (registry+https://github.com/rust-lang/crates.io-index)" = "59790990c5115d16027f00913e2e66de23a51f70422e549d2ad68c8c5f268f1c" @@ -2410,8 +2373,6 @@ dependencies = [ "checksum termcolor 0.3.6 (registry+https://github.com/rust-lang/crates.io-index)" = "adc4587ead41bf016f11af03e55a624c06568b5a19db4e90fde573d805074f83" "checksum termion 1.5.1 (registry+https://github.com/rust-lang/crates.io-index)" = "689a3bdfaab439fd92bc87df5c4c78417d3cbe537487274e9b0b2dce76e92096" "checksum textwrap 0.9.0 (registry+https://github.com/rust-lang/crates.io-index)" = "c0b59b6b4b44d867f1370ef1bd91bfb262bf07bf0ae65c202ea2fbc16153b693" -"checksum thread-id 2.0.0 (registry+https://github.com/rust-lang/crates.io-index)" = "a9539db560102d1cef46b8b78ce737ff0bb64e7e18d35b2a5688f7d097d0ff03" -"checksum thread_local 0.2.7 (registry+https://github.com/rust-lang/crates.io-index)" = "8576dbbfcaef9641452d5cf0df9b0e7eeab7694956dd33bb61515fb8f18cfdd5" "checksum thread_local 0.3.5 (registry+https://github.com/rust-lang/crates.io-index)" = "279ef31c19ededf577bfd12dfae728040a21f635b06a24cd670ff510edd38963" "checksum time 0.1.39 (registry+https://github.com/rust-lang/crates.io-index)" = "a15375f1df02096fb3317256ce2cee6a1f42fc84ea5ad5fc8c421cfe40c73098" "checksum tokio 0.1.11 (registry+https://github.com/rust-lang/crates.io-index)" = "6e93c78d23cc61aa245a8acd2c4a79c4d7fa7fb5c3ca90d5737029f043a84895" @@ -2442,7 +2403,6 @@ dependencies = [ "checksum unreachable 1.0.0 (registry+https://github.com/rust-lang/crates.io-index)" = "382810877fe448991dfc7f0dd6e3ae5d58088fd0ea5e35189655f84e6814fa56" "checksum url 1.7.0 (registry+https://github.com/rust-lang/crates.io-index)" = "f808aadd8cfec6ef90e4a14eb46f24511824d1ac596b9682703c87056c8678b7" "checksum url_serde 0.2.0 (registry+https://github.com/rust-lang/crates.io-index)" = "74e7d099f1ee52f823d4bdd60c93c3602043c728f5db3b97bdb548467f7bddea" -"checksum utf8-ranges 0.1.3 (registry+https://github.com/rust-lang/crates.io-index)" = "a1ca13c08c41c9c3e04224ed9ff80461d97e121589ff27c753a16cb10830ae0f" "checksum utf8-ranges 1.0.0 (registry+https://github.com/rust-lang/crates.io-index)" = "662fab6525a98beff2921d7f61a39e7d59e0b425ebc7d0d9e66d316e55124122" "checksum vcpkg 0.2.3 (registry+https://github.com/rust-lang/crates.io-index)" = "7ed0f6789c8a85ca41bbc1c9d175422116a9869bd1cf31bb08e1493ecce60380" "checksum vec_map 0.8.0 (registry+https://github.com/rust-lang/crates.io-index)" = "887b5b631c2ad01628bbbaa7dd4c869f80d3186688f8d0b6f58774fbe324988c" diff --git a/edgelet/contrib/config/linux/config.yaml b/edgelet/contrib/config/linux/config.yaml index 6147a2382f8..60c1fd01578 100644 --- a/edgelet/contrib/config/linux/config.yaml +++ b/edgelet/contrib/config/linux/config.yaml @@ -144,6 +144,33 @@ agent: hostname: "" +############################################################################### +# Watchdog settings +############################################################################### +# +# The IoT edge daemon has a watchdog that periodically checks the health of the +# Edge Agent module and restarts it if it's down. +# +# max_retries - Configures the number of retry attempts that the IoT edge daemon +# should make for failed operations before failing with a fatal error. +# +# If this configuration is not specified, the daemon keeps retrying +# on errors and doesn't fail fatally. +# +# On a fatal failure, the daemon returns an exit code which +# signifies the kind of error encountered. Currently, the following +# error codes are returned by the daemon - +# +# 150 - Invalid Device ID specified. +# 151 - Invalid IoT hub configuration. +# 152 - Invalid SAS token used to call IoT hub. +# This could signal an invalid SAS key. +# 1 - All other errors. +############################################################################### + +#watchdog: +# max_retries: 2 + ############################################################################### # Connect settings ############################################################################### diff --git a/edgelet/contrib/config/linux/debian/config.yaml b/edgelet/contrib/config/linux/debian/config.yaml index d9383c37460..8c22f1964dd 100644 --- a/edgelet/contrib/config/linux/debian/config.yaml +++ b/edgelet/contrib/config/linux/debian/config.yaml @@ -144,6 +144,33 @@ agent: hostname: "" +############################################################################### +# Watchdog settings +############################################################################### +# +# The IoT edge daemon has a watchdog that periodically checks the health of the +# Edge Agent module and restarts it if it's down. +# +# max_retries - Configures the number of retry attempts that the IoT edge daemon +# should make for failed operations before failing with a fatal error. +# +# If this configuration is not specified, the daemon keeps retrying +# on errors and doesn't fail fatally. +# +# On a fatal failure, the daemon returns an exit code which +# signifies the kind of error encountered. Currently, the following +# error codes are returned by the daemon - +# +# 150 - Invalid Device ID specified. +# 151 - Invalid IoT hub configuration. +# 152 - Invalid SAS token used to call IoT hub. +# This could signal an invalid SAS key. +# 1 - All other errors. +############################################################################### + +#watchdog: +# max_retries: 2 + ############################################################################### # Connect settings ############################################################################### diff --git a/edgelet/contrib/config/windows/config.yaml b/edgelet/contrib/config/windows/config.yaml index ebbac3068fa..3c7cc5f42b9 100644 --- a/edgelet/contrib/config/windows/config.yaml +++ b/edgelet/contrib/config/windows/config.yaml @@ -144,6 +144,33 @@ agent: hostname: "" +############################################################################### +# Watchdog settings +############################################################################### +# +# The IoT edge daemon has a watchdog that periodically checks the health of the +# Edge Agent module and restarts it if it's down. +# +# max_retries - Configures the number of retry attempts that the IoT edge daemon +# should make for failed operations before failing with a fatal error. +# +# If this configuration is not specified, the daemon keeps retrying +# on errors and doesn't fail fatally. +# +# On a fatal failure, the daemon returns an exit code which +# signifies the kind of error encountered. Currently, the following +# error codes are returned by the daemon - +# +# 150 - Invalid Device ID specified. +# 151 - Invalid IoT hub configuration. +# 152 - Invalid SAS token used to call IoT hub. +# This could signal an invalid SAS key. +# 1 - All other errors. +############################################################################### + +#watchdog: +# max_retries: 2 + ############################################################################### # Connect settings ############################################################################### diff --git a/edgelet/edgelet-config/src/lib.rs b/edgelet/edgelet-config/src/lib.rs index 98a8f5e41e8..e7dac70c6b6 100644 --- a/edgelet/edgelet-config/src/lib.rs +++ b/edgelet/edgelet-config/src/lib.rs @@ -25,6 +25,7 @@ use sha2::{Digest, Sha256}; use url::Url; use edgelet_core::crypto::MemoryKey; +use edgelet_core::watchdog::RetryLimit; use edgelet_core::ModuleSpec; use edgelet_utils::log_failure; @@ -358,6 +359,18 @@ impl Certificates { } } +#[derive(Debug, Default, Deserialize, Serialize)] +pub struct WatchdogSettings { + #[serde(default)] + max_retries: RetryLimit, +} + +impl WatchdogSettings { + pub fn max_retries(&self) -> &RetryLimit { + &self.max_retries + } +} + #[derive(Debug, Deserialize, Serialize)] pub struct Settings { provisioning: Provisioning, @@ -368,6 +381,8 @@ pub struct Settings { homedir: PathBuf, moby_runtime: MobyRuntime, certificates: Option, + #[serde(default)] + watchdog: WatchdogSettings, } impl Settings @@ -432,6 +447,10 @@ where self.certificates.as_ref() } + pub fn watchdog(&self) -> &WatchdogSettings { + &self.watchdog + } + pub fn diff_with_cached(&self, path: &Path) -> bool { fn diff_with_cached_inner( cached_settings: &Settings, @@ -510,6 +529,7 @@ mod tests { use super::*; use config::{Config, File, FileFormat}; use edgelet_docker::DockerConfig; + use std::cmp::Ordering; use std::fs::File as FsFile; use std::io::Write; use tempdir::TempDir; @@ -830,4 +850,14 @@ mod tests { let create_options = settings.agent().config().create_options(); assert_eq!(create_options.hostname(), Some("VAluE3")); } + + #[test] + fn watchdog_settings_are_read() { + let settings = Settings::::new(Some(Path::new(GOOD_SETTINGS))); + println!("{:?}", settings); + assert!(settings.is_ok()); + let s = settings.unwrap(); + let watchdog_settings = s.watchdog(); + assert_eq!(watchdog_settings.max_retries().compare(3), Ordering::Equal); + } } diff --git a/edgelet/edgelet-config/test/linux/sample_settings.yaml b/edgelet/edgelet-config/test/linux/sample_settings.yaml index 7dcf7f2ef60..f51d1985c78 100644 --- a/edgelet/edgelet-config/test/linux/sample_settings.yaml +++ b/edgelet/edgelet-config/test/linux/sample_settings.yaml @@ -14,6 +14,9 @@ agent: auth: {} hostname: "localhost" +watchdog: + max_retries: 3 + # Sets the connection uris for clients connect: workload_uri: "http://localhost:8081" diff --git a/edgelet/edgelet-config/test/linux/sample_settings1.yaml b/edgelet/edgelet-config/test/linux/sample_settings1.yaml index bbac6df76b1..eda6e885a94 100644 --- a/edgelet/edgelet-config/test/linux/sample_settings1.yaml +++ b/edgelet/edgelet-config/test/linux/sample_settings1.yaml @@ -12,6 +12,9 @@ agent: auth: {} hostname: "localhost" +watchdog: + max_retries: 3 + # Sets the connection uris for clients connect: workload_uri: "http://localhost:8081" diff --git a/edgelet/edgelet-config/test/linux/sample_settings2.yaml b/edgelet/edgelet-config/test/linux/sample_settings2.yaml index 24f70b096f1..3530a4d4109 100644 --- a/edgelet/edgelet-config/test/linux/sample_settings2.yaml +++ b/edgelet/edgelet-config/test/linux/sample_settings2.yaml @@ -14,6 +14,9 @@ agent: auth: {} hostname: "localhost" +watchdog: + max_retries: 3 + # Sets the connection uris for clients connect: workload_uri: "http://localhost:8081" diff --git a/edgelet/edgelet-config/test/windows/sample_settings.yaml b/edgelet/edgelet-config/test/windows/sample_settings.yaml index 5c25c3d952a..35a21df0ba2 100644 --- a/edgelet/edgelet-config/test/windows/sample_settings.yaml +++ b/edgelet/edgelet-config/test/windows/sample_settings.yaml @@ -14,6 +14,9 @@ agent: auth: {} hostname: "localhost" +watchdog: + max_retries: 3 + # Sets the connection uris for clients connect: workload_uri: "http://localhost:8081" diff --git a/edgelet/edgelet-config/test/windows/sample_settings1.yaml b/edgelet/edgelet-config/test/windows/sample_settings1.yaml index 76071a9bca1..a19041ae917 100644 --- a/edgelet/edgelet-config/test/windows/sample_settings1.yaml +++ b/edgelet/edgelet-config/test/windows/sample_settings1.yaml @@ -12,6 +12,9 @@ agent: auth: {} hostname: "localhost" +watchdog: + max_retries: 3 + # Sets the connection uris for clients connect: workload_uri: "http://localhost:8081" diff --git a/edgelet/edgelet-config/test/windows/sample_settings2.yaml b/edgelet/edgelet-config/test/windows/sample_settings2.yaml index 0c848370cb8..9e381d145c5 100644 --- a/edgelet/edgelet-config/test/windows/sample_settings2.yaml +++ b/edgelet/edgelet-config/test/windows/sample_settings2.yaml @@ -14,6 +14,9 @@ agent: auth: {} hostname: "localhost" +watchdog: + max_retries: 3 + # Sets the connection uris for clients connect: workload_uri: "http://localhost:8081" diff --git a/edgelet/edgelet-core/src/lib.rs b/edgelet/edgelet-core/src/lib.rs index 95a881242a3..8d49741b342 100644 --- a/edgelet/edgelet-core/src/lib.rs +++ b/edgelet/edgelet-core/src/lib.rs @@ -34,6 +34,7 @@ pub use module::{ ModuleRuntimeErrorReason, ModuleRuntimeState, ModuleSpec, ModuleStatus, ModuleTop, RegistryOperation, RuntimeOperation, SystemInfo, }; +pub use watchdog::RetryLimit; pub use workload::WorkloadConfig; lazy_static! { diff --git a/edgelet/edgelet-core/src/watchdog.rs b/edgelet/edgelet-core/src/watchdog.rs index 5808a3078c1..c01d56f1760 100644 --- a/edgelet/edgelet-core/src/watchdog.rs +++ b/edgelet/edgelet-core/src/watchdog.rs @@ -1,11 +1,13 @@ // Copyright (c) Microsoft. All rights reserved. +use std::cmp::Ordering; use std::time::{Duration, Instant}; use failure::Fail; use futures::future::{self, Either, FutureResult}; use futures::Future; use log::{info, warn, Level}; +use serde_derive::{Deserialize, Serialize}; use tokio::prelude::*; use tokio::timer::Interval; @@ -26,9 +28,32 @@ const MODULE_GENERATIONID: &str = "IOTEDGE_MODULEGENERATIONID"; /// This is the frequency with which the watchdog checks for the status of the edge runtime module. const WATCHDOG_FREQUENCY_SECS: u64 = 60; +#[derive(Clone, Debug, Deserialize, Serialize)] +#[serde(untagged)] +pub enum RetryLimit { + Infinite, + Num(u32), +} + +impl RetryLimit { + pub fn compare(&self, right: u32) -> Ordering { + match self { + RetryLimit::Infinite => Ordering::Greater, + RetryLimit::Num(n) => n.cmp(&right), + } + } +} + +impl Default for RetryLimit { + fn default() -> Self { + RetryLimit::Infinite + } +} + pub struct Watchdog { runtime: M, id_mgr: I, + max_retries: RetryLimit, } impl Watchdog @@ -38,8 +63,12 @@ where ::Config: Clone, I: 'static + IdentityManager + Clone, { - pub fn new(runtime: M, id_mgr: I) -> Self { - Watchdog { runtime, id_mgr } + pub fn new(runtime: M, id_mgr: I, max_retries: RetryLimit) -> Self { + Watchdog { + runtime, + id_mgr, + max_retries, + } } // Start the edge runtime module (EdgeAgent). This also updates the identity of the module (module_id) @@ -59,8 +88,9 @@ where let name = spec.name().to_string(); let id_mgr = self.id_mgr.clone(); let module_id = module_id.to_string(); + let max_retries = self.max_retries.clone(); - let watchdog = start_watchdog(runtime, id_mgr, spec, module_id); + let watchdog = start_watchdog(runtime, id_mgr, spec, module_id, max_retries); // Swallow any errors from shutdown_signal let shutdown_signal = shutdown_signal.then(|_| Ok(())); @@ -100,6 +130,7 @@ pub fn start_watchdog( id_mgr: I, spec: ModuleSpec<::Config>, module_id: String, + max_retries: RetryLimit, ) -> impl Future where M: 'static + ModuleRuntime + Clone, @@ -110,9 +141,10 @@ where "Starting watchdog with {} second frequency...", WATCHDOG_FREQUENCY_SECS ); + Interval::new(Instant::now(), Duration::from_secs(WATCHDOG_FREQUENCY_SECS)) .map_err(|err| Error::from(err.context(ErrorKind::EdgeRuntimeStatusCheckerTimer))) - .for_each(move |_| { + .and_then(move |_| { info!("Checking edge runtime status"); check_runtime( runtime.clone(), @@ -120,12 +152,25 @@ where spec.clone(), module_id.clone(), ) + .and_then(|_| future::ok(None)) .or_else(|e| { warn!("Error in watchdog when checking for edge runtime status:"); log_failure(Level::Warn, &e); - future::ok(()) + future::ok(Some(e)) }) }) + .fold(0, move |exec_count: u32, result: Option| { + result + .and_then(|e| { + if max_retries.compare(exec_count) == Ordering::Greater { + Some(Ok(exec_count + 1)) + } else { + Some(Err(e)) + } + }) + .unwrap_or_else(|| Ok(0)) + }) + .map(|_| ()) } // Check if the edge runtime module is running, and if not, start it. diff --git a/edgelet/iotedged/src/error.rs b/edgelet/iotedged/src/error.rs index 5d64f9d3cb4..94efdf814ed 100644 --- a/edgelet/iotedged/src/error.rs +++ b/edgelet/iotedged/src/error.rs @@ -5,6 +5,12 @@ use std::fmt::Display; #[cfg(windows)] use std::sync::Mutex; +use edgelet_core::Error as CoreError; +use edgelet_core::ErrorKind as CoreErrorKind; +use edgelet_http::Error as HttpError; +use edgelet_http::ErrorKind as HttpErrorKind; +use iothubservice::Error as HubServiceError; + use failure::{Backtrace, Context, Fail}; #[cfg(windows)] use windows_service::Error as WindowsServiceError; @@ -25,6 +31,9 @@ pub enum ErrorKind { #[fail(display = "The daemon could not start up successfully: {}", _0)] Initialize(InitializeErrorReason), + #[fail(display = "Invalid signed token was provided.")] + InvalidSignedToken, + #[fail(display = "The management service encountered an error")] ManagementService, @@ -72,12 +81,74 @@ impl From for Error { } } +impl From for Error { + fn from(error: CoreError) -> Self { + let fail: &dyn Fail = &error; + let mut error_kind = ErrorKind::Watchdog; + + for cause in fail.iter_causes() { + if let Some(service_err) = cause.downcast_ref::() { + let hub_failure: &dyn Fail = service_err; + + for cause in hub_failure.iter_causes() { + if let Some(err) = cause.downcast_ref::() { + match HttpError::kind(err) { + HttpErrorKind::Http => { + error_kind = + ErrorKind::Initialize(InitializeErrorReason::InvalidHubConfig); + } + HttpErrorKind::HttpWithErrorResponse(code, _message) => { + if code.as_u16() == 401 { + error_kind = ErrorKind::InvalidSignedToken; + } + } + _ => {} + }; + + break; + } + } + + break; + } + } + + let error_kind_result = match error.kind() { + CoreErrorKind::EdgeRuntimeIdentityNotFound => { + ErrorKind::Initialize(InitializeErrorReason::InvalidDeviceConfig) + } + _ => error_kind, + }; + + Error::from(error.context(error_kind_result)) + } +} + impl From> for Error { fn from(inner: Context) -> Self { Error { inner } } } +impl From<&ErrorKind> for i32 { + fn from(err: &ErrorKind) -> Self { + match err { + // Using 150 as the starting base for custom IoT edge error codes so as to avoid + // collisions with - + // 1. The standard error codes defined by the BSD ecosystem + // (https://www.freebsd.org/cgCould not get module i/man.cgi?query=sysexits&apropos=0&sektion=0&manpath=FreeBSD+11.2-stable&arch=default&format=html) + // that is recommended by the Rust docs + // (https://rust-lang-nursery.github.io/cli-wg/in-depth/exit-code.html) + // 2. Bash scripting exit codes with special meanings + // (http://www.tldp.org/LDP/abs/html/exitcodes.html) + ErrorKind::Initialize(InitializeErrorReason::InvalidDeviceConfig) => 150, + ErrorKind::Initialize(InitializeErrorReason::InvalidHubConfig) => 151, + ErrorKind::InvalidSignedToken => 152, + _ => 1, + } + } +} + #[derive(Clone, Copy, Debug, PartialEq)] pub enum InitializeErrorReason { CreateCertificateManager, @@ -90,6 +161,8 @@ pub enum InitializeErrorReason { EdgeRuntime, Hsm, HttpClient, + InvalidDeviceConfig, + InvalidHubConfig, InvalidProxyUri, InvalidSocketUri, IssuerCAExpiration, @@ -144,6 +217,14 @@ impl fmt::Display for InitializeErrorReason { InitializeErrorReason::HttpClient => write!(f, "Could not initialize HTTP client"), + InitializeErrorReason::InvalidDeviceConfig => { + write!(f, "Invalid device configuration was provided") + } + + InitializeErrorReason::InvalidHubConfig => { + write!(f, "Invalid IoT hub configuration was provided") + } + InitializeErrorReason::InvalidProxyUri => write!(f, "Invalid proxy URI"), InitializeErrorReason::InvalidSocketUri => write!(f, "Invalid socket URI"), diff --git a/edgelet/iotedged/src/lib.rs b/edgelet/iotedged/src/lib.rs index ea2611b39c0..4cac6bf5138 100644 --- a/edgelet/iotedged/src/lib.rs +++ b/edgelet/iotedged/src/lib.rs @@ -891,10 +891,14 @@ where ], )?; - let watchdog = Watchdog::new(runtime.clone(), id_man.clone()); + let watchdog = Watchdog::new( + runtime.clone(), + id_man.clone(), + settings.watchdog().max_retries().clone(), + ); let runtime_future = watchdog .run_until(spec, EDGE_RUNTIME_MODULEID, shutdown.map_err(|_| ())) - .map_err(|err| Error::from(err.context(ErrorKind::Watchdog))); + .map_err(Error::from); Ok(runtime_future) } diff --git a/edgelet/iotedged/src/main.rs b/edgelet/iotedged/src/main.rs index 37c9382747a..e50d8222088 100644 --- a/edgelet/iotedged/src/main.rs +++ b/edgelet/iotedged/src/main.rs @@ -7,7 +7,7 @@ fn main() { if let Err(e) = iotedged::unix::run() { iotedged::logging::log_error(&e); - std::process::exit(1); + std::process::exit(i32::from(e.kind())); } } @@ -15,6 +15,6 @@ fn main() { fn main() { if let Err(e) = iotedged::windows::run() { iotedged::logging::log_error(&e); - std::process::exit(1); + std::process::exit(i32::from(e.kind())); } }