From eafbadc63b7f1c51184e561323922cb0032ccd5b Mon Sep 17 00:00:00 2001 From: Tokunori Ikegami Date: Sun, 7 Jul 2024 15:08:20 +0900 Subject: [PATCH 1/3] ocp: add get-error-injection command The set-error-injection command will be added separately. Signed-off-by: Tokunori Ikegami --- plugins/ocp/ocp-nvme.c | 157 ++++++++++++++++++++++++++++++++++++++++- plugins/ocp/ocp-nvme.h | 1 + 2 files changed, 157 insertions(+), 1 deletion(-) diff --git a/plugins/ocp/ocp-nvme.c b/plugins/ocp/ocp-nvme.c index 8a3731d875..9f6e0ed453 100644 --- a/plugins/ocp/ocp-nvme.c +++ b/plugins/ocp/ocp-nvme.c @@ -23,6 +23,7 @@ #include "linux/types.h" #include "util/types.h" #include "nvme-print.h" +#include "nvme-wrap.h" #include "ocp-smart-extended-log.h" #include "ocp-clear-features.h" @@ -113,6 +114,82 @@ struct __packed feature_latency_monitor { __u8 reserved[4083]; }; +struct erri_entry { + union { + __u8 flags; + struct { + __u8 enable:1; + __u8 single:1; + __u8 rsvd2:6; + }; + }; + __u8 rsvd1; + __le16 type; + union { + __u8 specific[28]; + struct { + __le16 nrtdp; + __u8 rsvd4[26]; + }; + }; +}; + +#define ERRI_ENTRIES_MAX 127 + +enum erri_type { + ERRI_TYPE_CPU_CTRL_HANG = 1, + ERRI_TYPE_NAND_HANG, + ERRI_TYPE_PLP_DEFECT, + ERRI_TYPE_LOGICAL_FIRMWARE_ERROR, + ERRI_TYPE_DRAM_CORRUPT_CRIT, + ERRI_TYPE_DRAM_CORRUPT_NON_CRIT, + ERRI_TYPE_NAND_CORRUPT, + ERRI_TYPE_SRAM_CORRUPT, + ERRI_TYPE_HW_MALFUNCTION, + ERRI_TYPE_NO_MORE_NAND_SPARES, + ERRI_TYPE_INCOMPLETE_SHUTDOWN, +}; + +const char *erri_type_to_string(__le16 type) +{ + switch (type) { + case ERRI_TYPE_CPU_CTRL_HANG: + return "CPU/controller hang"; + case ERRI_TYPE_NAND_HANG: + return "NAND hang"; + case ERRI_TYPE_PLP_DEFECT: + return "PLP defect"; + case ERRI_TYPE_LOGICAL_FIRMWARE_ERROR: + return "logical firmware error"; + case ERRI_TYPE_DRAM_CORRUPT_CRIT: + return "DRAM corruption critical path"; + case ERRI_TYPE_DRAM_CORRUPT_NON_CRIT: + return "DRAM corruption non-critical path"; + case ERRI_TYPE_NAND_CORRUPT: + return "NAND corruption"; + case ERRI_TYPE_SRAM_CORRUPT: + return "SRAM corruption"; + case ERRI_TYPE_HW_MALFUNCTION: + return "HW malfunction"; + case ERRI_TYPE_NO_MORE_NAND_SPARES: + return "no more NAND spares available"; + case ERRI_TYPE_INCOMPLETE_SHUTDOWN: + return "incomplete shutdown"; + default: + break; + } + + return "unknown"; +} + +struct erri_get_cq_entry { + __u32 nume:7; + __u32 rsvd7:25; +}; + +static const char *sel = "[0-3]: current/default/saved/supported"; +static const char *no_uuid = "Skip UUID index search (UUID index not required for OCP 1.0)"; + static int ocp_print_C3_log_normal(struct nvme_dev *dev, struct ssd_latency_monitor_log *log_data) { @@ -717,7 +794,6 @@ static int eol_plp_failure_mode_set(struct nvme_dev *dev, const __u32 nsid, } } - struct nvme_set_features_args args = { .args_size = sizeof(args), .fd = dev_fd(dev), @@ -3699,3 +3775,82 @@ static int fw_activation_history_log(int argc, char **argv, struct command *cmd, { return ocp_fw_activation_history_log(argc, argv, cmd, plugin); } + +static int error_injection_get(struct nvme_dev *dev, const __u8 sel, bool uuid) +{ + struct erri_get_cq_entry cq_entry; + int err; + int i; + const __u8 fid = 0xc0; + + _cleanup_free_ struct erri_entry *entry = NULL; + + struct nvme_get_features_args args = { + .result = (__u32 *)&cq_entry, + .data = entry, + .args_size = sizeof(args), + .fd = dev_fd(dev), + .timeout = NVME_DEFAULT_IOCTL_TIMEOUT, + .sel = sel, + .data_len = sizeof(*entry) * ERRI_ENTRIES_MAX, + .fid = fid, + }; + + if (uuid) { + /* OCP 2.0 requires UUID index support */ + err = ocp_get_uuid_index(dev, &args.uuidx); + if (err || !args.uuidx) { + nvme_show_error("ERROR: No OCP UUID index found"); + return err; + } + } + + entry = nvme_alloc(args.data_len); + if (!entry) { + nvme_show_error("malloc: %s", strerror(errno)); + return -errno; + } + + err = nvme_cli_get_features(dev, &args); + if (!err) { + nvme_show_result("Number of Error Injecttions (feature: %#0*x): %#0*x (%s: %d)", + fid ? 4 : 2, fid, cq_entry.nume ? 10 : 8, cq_entry.nume, + nvme_select_to_string(sel), cq_entry.nume); + if (sel == NVME_GET_FEATURES_SEL_SUPPORTED) + nvme_show_select_result(fid, *args.result); + for (i = 0; i < cq_entry.nume; i++) { + printf("Entry: %d, Flags: %x (%s%s), Type: %x (%s), NRTDP: %d\n", i, + entry->flags, entry->enable ? "Enabled" : "Disabled", + entry->single ? ", Single instance" : "", entry->type, + erri_type_to_string(entry->type), entry->nrtdp); + } + } else { + nvme_show_error("Could not get feature: %#0*x.", fid ? 4 : 2, fid); + } + + return err; +} + +static int get_error_injection(int argc, char **argv, struct command *cmd, struct plugin *plugin) +{ + const char *desc = "Return set of error injection"; + int err; + struct config { + __u8 sel; + }; + struct config cfg = { 0 }; + + _cleanup_nvme_dev_ struct nvme_dev *dev = NULL; + + OPT_ARGS(opts) = { + OPT_BYTE("sel", 's', &cfg.sel, sel), + OPT_FLAG("no-uuid", 'n', NULL, no_uuid), + OPT_END() + }; + + err = parse_and_open(&dev, argc, argv, desc, opts); + if (err) + return err; + + return error_injection_get(dev, cfg.sel, !argconfig_parse_seen(opts, "no-uuid")); +} diff --git a/plugins/ocp/ocp-nvme.h b/plugins/ocp/ocp-nvme.h index f468b22fda..c98c00bede 100644 --- a/plugins/ocp/ocp-nvme.h +++ b/plugins/ocp/ocp-nvme.h @@ -36,6 +36,7 @@ PLUGIN(NAME("ocp", "OCP cloud SSD extensions", OCP_PLUGIN_VERSION), ENTRY("set-dssd-async-event-config", "Set DSSD Async Event Config", set_dssd_async_event_config) ENTRY("get-dssd-async-event-config", "Get DSSD Async Event Config", get_dssd_async_event_config) ENTRY("tcg-configuration-log", "Retrieve TCG Configuration Log Page", ocp_tcg_configuration_log) + ENTRY("get-error-injection", "Return set of error injection", get_error_injection) ) ); From 26fc51435624ba2e8365c2aa31e6de7e0323f329 Mon Sep 17 00:00:00 2001 From: Tokunori Ikegami Date: Sun, 7 Jul 2024 16:36:07 +0900 Subject: [PATCH 2/3] doc: add ocp get-error-injection command The set-error-injection command will be added separately. Signed-off-by: Tokunori Ikegami --- .../nvme-ocp-get-error-injection.txt | 57 +++++++++++++++++++ 1 file changed, 57 insertions(+) create mode 100644 Documentation/nvme-ocp-get-error-injection.txt diff --git a/Documentation/nvme-ocp-get-error-injection.txt b/Documentation/nvme-ocp-get-error-injection.txt new file mode 100644 index 0000000000..8061e497bf --- /dev/null +++ b/Documentation/nvme-ocp-get-error-injection.txt @@ -0,0 +1,57 @@ +nvme-ocp-get-error-injection(1) +=============================== + +NAME +---- +nvme-ocp-get-error-injection - Return set of error injection + +SYNOPSIS +-------- +[verse] +'nvme ocp get-error-injection' [--no-uuid | -n] + [--sel=] + +DESCRIPTION +----------- +Return set of error injection. + +The parameter is mandatory NVMe character device (ex: /dev/nvme0). + +This will only work on OCP compliant devices supporting this feature. +Results for any other device are undefined. + +On success it returns 0, error code otherwise. + +OPTIONS +------- +-n:: +--no-uuid:: + Do not try to automatically detect UUID index for this command (required + for old OCP 1.0 support) + +-s :: + Select (SEL): This field specifies which value of the attributes + to return in the provided data: ++ +[] +|================== +|Select|Description +|0|Current +|1|Default +|2|Saved +|3|Supported capabilities +|4-7|Reserved +|================== + +EXAMPLES +-------- +* Has the program issue a get-error-injection to retrieve the 0xC0 get features. ++ +------------ +# nvme ocp get-error-injection /dev/nvme0 +------------ + +NVME +---- +Part of the nvme-user suite. From 59de7cef8514e67a876fac0d1971eb1ba2f5c3d2 Mon Sep 17 00:00:00 2001 From: Tokunori Ikegami Date: Sun, 7 Jul 2024 16:52:44 +0900 Subject: [PATCH 3/3] completions: add ocp get-error-injection command The set-error-injection command will be added separately. Signed-off-by: Tokunori Ikegami --- completions/_nvme | 13 +++++++++++++ completions/bash-nvme-completion.sh | 6 +++++- 2 files changed, 18 insertions(+), 1 deletion(-) diff --git a/completions/_nvme b/completions/_nvme index 33d30ebed4..4c51e94105 100644 --- a/completions/_nvme +++ b/completions/_nvme @@ -315,6 +315,18 @@ _nvme () { _arguments '*:: :->subcmds' _describe -t commands "nvme ocp tcg-configuration-log options" _ocp_tcg_configuration_log ;; + (get-error-injection) + local _get_error_injection + _get_error_injection=( + /dev/nvme':supply a device to use (required)' + --sel=':0-3: current/default/saved/supported/changed:' + -s':alias for --sel' + --no-uuid':Skip UUID index search' + -n':alias for --no-uuid' + ) + _arguments '*:: :->subcmds' + _describe -t commands "nvme ocp get-error-injection options" _get_error_injection + ;; (*) _files ;; @@ -2502,6 +2514,7 @@ _nvme () { telemetry-string-log':Retrieve Telemetry string Log Page' set-telemetry-profile':Set Telemetry Profile' tcg-configuration-log':tcg configuration log' + get-error-injection':get error injection' ) _arguments '*:: :->subcmds' _describe -t commands "nvme ocp options" _ocp diff --git a/completions/bash-nvme-completion.sh b/completions/bash-nvme-completion.sh index e22609b8a0..63c76436ed 100644 --- a/completions/bash-nvme-completion.sh +++ b/completions/bash-nvme-completion.sh @@ -1478,6 +1478,9 @@ plugin_ocp_opts () { "tcg-configuration-log") opts+=" --output-file= -o" ;; + "get-error-injection") + opts+=" --sel= -s --no-uuid -n" + ;; "help") opts+=$NO_OPTS ;; @@ -1555,7 +1558,8 @@ _nvme_subcmds () { vs-fw-activate-history device-capability-log \ set-dssd-power-state-feature get-dssd-power-state-feature \ telemetry-string-log set-telemetry-profile \ - set-dssd-async-event-config get-dssd-async-event-config" + set-dssd-async-event-config get-dssd-async-event-config \ + get-error-injection" ) # Associative array mapping plugins to corresponding option completions