Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

ocp-nvme: add get-error-injection command #2401

Merged
merged 3 commits into from
Jul 10, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
57 changes: 57 additions & 0 deletions Documentation/nvme-ocp-get-error-injection.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,57 @@
nvme-ocp-get-error-injection(1)
===============================

NAME
----
nvme-ocp-get-error-injection - Return set of error injection

SYNOPSIS
--------
[verse]
'nvme ocp get-error-injection' <device> [--no-uuid | -n]
[--sel=<select> | -s <select>]

DESCRIPTION
-----------
Return set of error injection.

The <device> parameter is mandatory NVMe character device (ex: /dev/nvme0).

This will only work on OCP compliant devices supporting this feature.
Results for any other device are undefined.

On success it returns 0, error code otherwise.

OPTIONS
-------
-n::
--no-uuid::
Do not try to automatically detect UUID index for this command (required
for old OCP 1.0 support)

-s <select>::
--sel=<select>::
Select (SEL): This field specifies which value of the attributes
to return in the provided data:
+
[]
|==================
|Select|Description
|0|Current
|1|Default
|2|Saved
|3|Supported capabilities
|4-7|Reserved
|==================

EXAMPLES
--------
* Has the program issue a get-error-injection to retrieve the 0xC0 get features.
+
------------
# nvme ocp get-error-injection /dev/nvme0
------------

NVME
----
Part of the nvme-user suite.
13 changes: 13 additions & 0 deletions completions/_nvme
Original file line number Diff line number Diff line change
Expand Up @@ -315,6 +315,18 @@ _nvme () {
_arguments '*:: :->subcmds'
_describe -t commands "nvme ocp tcg-configuration-log options" _ocp_tcg_configuration_log
;;
(get-error-injection)
local _get_error_injection
_get_error_injection=(
/dev/nvme':supply a device to use (required)'
--sel=':0-3: current/default/saved/supported/changed:'
-s':alias for --sel'
--no-uuid':Skip UUID index search'
-n':alias for --no-uuid'
)
_arguments '*:: :->subcmds'
_describe -t commands "nvme ocp get-error-injection options" _get_error_injection
;;
(*)
_files
;;
Expand Down Expand Up @@ -2502,6 +2514,7 @@ _nvme () {
telemetry-string-log':Retrieve Telemetry string Log Page'
set-telemetry-profile':Set Telemetry Profile'
tcg-configuration-log':tcg configuration log'
get-error-injection':get error injection'
)
_arguments '*:: :->subcmds'
_describe -t commands "nvme ocp options" _ocp
Expand Down
6 changes: 5 additions & 1 deletion completions/bash-nvme-completion.sh
Original file line number Diff line number Diff line change
Expand Up @@ -1478,6 +1478,9 @@ plugin_ocp_opts () {
"tcg-configuration-log")
opts+=" --output-file= -o"
;;
"get-error-injection")
opts+=" --sel= -s --no-uuid -n"
;;
"help")
opts+=$NO_OPTS
;;
Expand Down Expand Up @@ -1555,7 +1558,8 @@ _nvme_subcmds () {
vs-fw-activate-history device-capability-log \
set-dssd-power-state-feature get-dssd-power-state-feature \
telemetry-string-log set-telemetry-profile \
set-dssd-async-event-config get-dssd-async-event-config"
set-dssd-async-event-config get-dssd-async-event-config \
get-error-injection"
)

# Associative array mapping plugins to corresponding option completions
Expand Down
157 changes: 156 additions & 1 deletion plugins/ocp/ocp-nvme.c
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,7 @@
#include "linux/types.h"
#include "util/types.h"
#include "nvme-print.h"
#include "nvme-wrap.h"

#include "ocp-smart-extended-log.h"
#include "ocp-clear-features.h"
Expand Down Expand Up @@ -113,6 +114,82 @@ struct __packed feature_latency_monitor {
__u8 reserved[4083];
};

struct erri_entry {
union {
__u8 flags;
struct {
__u8 enable:1;
__u8 single:1;
__u8 rsvd2:6;
};
};
__u8 rsvd1;
__le16 type;
union {
__u8 specific[28];
struct {
__le16 nrtdp;
__u8 rsvd4[26];
};
};
};

#define ERRI_ENTRIES_MAX 127

enum erri_type {
ERRI_TYPE_CPU_CTRL_HANG = 1,
ERRI_TYPE_NAND_HANG,
ERRI_TYPE_PLP_DEFECT,
ERRI_TYPE_LOGICAL_FIRMWARE_ERROR,
ERRI_TYPE_DRAM_CORRUPT_CRIT,
ERRI_TYPE_DRAM_CORRUPT_NON_CRIT,
ERRI_TYPE_NAND_CORRUPT,
ERRI_TYPE_SRAM_CORRUPT,
ERRI_TYPE_HW_MALFUNCTION,
ERRI_TYPE_NO_MORE_NAND_SPARES,
ERRI_TYPE_INCOMPLETE_SHUTDOWN,
};

const char *erri_type_to_string(__le16 type)
{
switch (type) {
case ERRI_TYPE_CPU_CTRL_HANG:
return "CPU/controller hang";
case ERRI_TYPE_NAND_HANG:
return "NAND hang";
case ERRI_TYPE_PLP_DEFECT:
return "PLP defect";
case ERRI_TYPE_LOGICAL_FIRMWARE_ERROR:
return "logical firmware error";
case ERRI_TYPE_DRAM_CORRUPT_CRIT:
return "DRAM corruption critical path";
case ERRI_TYPE_DRAM_CORRUPT_NON_CRIT:
return "DRAM corruption non-critical path";
case ERRI_TYPE_NAND_CORRUPT:
return "NAND corruption";
case ERRI_TYPE_SRAM_CORRUPT:
return "SRAM corruption";
case ERRI_TYPE_HW_MALFUNCTION:
return "HW malfunction";
case ERRI_TYPE_NO_MORE_NAND_SPARES:
return "no more NAND spares available";
case ERRI_TYPE_INCOMPLETE_SHUTDOWN:
return "incomplete shutdown";
default:
break;
}

return "unknown";
}

struct erri_get_cq_entry {
__u32 nume:7;
__u32 rsvd7:25;
};

static const char *sel = "[0-3]: current/default/saved/supported";
static const char *no_uuid = "Skip UUID index search (UUID index not required for OCP 1.0)";

static int ocp_print_C3_log_normal(struct nvme_dev *dev,
struct ssd_latency_monitor_log *log_data)
{
Expand Down Expand Up @@ -717,7 +794,6 @@ static int eol_plp_failure_mode_set(struct nvme_dev *dev, const __u32 nsid,
}
}


struct nvme_set_features_args args = {
.args_size = sizeof(args),
.fd = dev_fd(dev),
Expand Down Expand Up @@ -3699,3 +3775,82 @@ static int fw_activation_history_log(int argc, char **argv, struct command *cmd,
{
return ocp_fw_activation_history_log(argc, argv, cmd, plugin);
}

static int error_injection_get(struct nvme_dev *dev, const __u8 sel, bool uuid)
{
struct erri_get_cq_entry cq_entry;
int err;
int i;
const __u8 fid = 0xc0;

_cleanup_free_ struct erri_entry *entry = NULL;

struct nvme_get_features_args args = {
.result = (__u32 *)&cq_entry,
.data = entry,
.args_size = sizeof(args),
.fd = dev_fd(dev),
.timeout = NVME_DEFAULT_IOCTL_TIMEOUT,
.sel = sel,
.data_len = sizeof(*entry) * ERRI_ENTRIES_MAX,
.fid = fid,
};

if (uuid) {
/* OCP 2.0 requires UUID index support */
err = ocp_get_uuid_index(dev, &args.uuidx);
if (err || !args.uuidx) {
nvme_show_error("ERROR: No OCP UUID index found");
return err;
}
}

entry = nvme_alloc(args.data_len);
if (!entry) {
nvme_show_error("malloc: %s", strerror(errno));
return -errno;
}

err = nvme_cli_get_features(dev, &args);
if (!err) {
nvme_show_result("Number of Error Injecttions (feature: %#0*x): %#0*x (%s: %d)",
fid ? 4 : 2, fid, cq_entry.nume ? 10 : 8, cq_entry.nume,
nvme_select_to_string(sel), cq_entry.nume);
if (sel == NVME_GET_FEATURES_SEL_SUPPORTED)
nvme_show_select_result(fid, *args.result);
for (i = 0; i < cq_entry.nume; i++) {
printf("Entry: %d, Flags: %x (%s%s), Type: %x (%s), NRTDP: %d\n", i,
entry->flags, entry->enable ? "Enabled" : "Disabled",
entry->single ? ", Single instance" : "", entry->type,
erri_type_to_string(entry->type), entry->nrtdp);
}
} else {
nvme_show_error("Could not get feature: %#0*x.", fid ? 4 : 2, fid);
}

return err;
}

static int get_error_injection(int argc, char **argv, struct command *cmd, struct plugin *plugin)
{
const char *desc = "Return set of error injection";
int err;
struct config {
__u8 sel;
};
struct config cfg = { 0 };

_cleanup_nvme_dev_ struct nvme_dev *dev = NULL;

OPT_ARGS(opts) = {
OPT_BYTE("sel", 's', &cfg.sel, sel),
OPT_FLAG("no-uuid", 'n', NULL, no_uuid),
OPT_END()
};

err = parse_and_open(&dev, argc, argv, desc, opts);
if (err)
return err;

return error_injection_get(dev, cfg.sel, !argconfig_parse_seen(opts, "no-uuid"));
}
1 change: 1 addition & 0 deletions plugins/ocp/ocp-nvme.h
Original file line number Diff line number Diff line change
Expand Up @@ -36,6 +36,7 @@ PLUGIN(NAME("ocp", "OCP cloud SSD extensions", OCP_PLUGIN_VERSION),
ENTRY("set-dssd-async-event-config", "Set DSSD Async Event Config", set_dssd_async_event_config)
ENTRY("get-dssd-async-event-config", "Get DSSD Async Event Config", get_dssd_async_event_config)
ENTRY("tcg-configuration-log", "Retrieve TCG Configuration Log Page", ocp_tcg_configuration_log)
ENTRY("get-error-injection", "Return set of error injection", get_error_injection)
)
);

Expand Down
Loading