Skip to content

Commit

Permalink
nvme: Add support for Autonomous Power State Transition
Browse files Browse the repository at this point in the history
APST is an optional NVMe power-saving feature that allows devices
to automatically enter higher non-operational power states after a
certain amount of idle time, reducing the controller's overall power
consumption.

The feature configuration involves filling out the transition table,
which then needs to be sent to the controller in a data buffer.
Each table entry corresponds to one of the available power states
and contains two values: idle transition power state (ITPS) and
idle time prior to transition (ITPT). The first specifies the next
power state the controller should switch to, and the second specifies
the amount of idle time required before that switch.

Two sysctls are added: apst_itpt_factor for ITPT calculation (as
an integer by which the total latency will be multiplied to get a
suitable transition flow), and apst_max_latency for cutting off
higher states with unwanted latency (by specifying a maximum value
in microseconds).

The default behavior is to keep the vendor settings.

Signed-off-by: Alexey Sukhoguzov <[email protected]>
  • Loading branch information
Alexey Sukhoguzov committed Oct 1, 2024
1 parent 865297c commit fb27132
Show file tree
Hide file tree
Showing 5 changed files with 142 additions and 0 deletions.
9 changes: 9 additions & 0 deletions share/man/man4/nvme.4
Original file line number Diff line number Diff line change
Expand Up @@ -188,6 +188,15 @@ Set to 0 to disable.
.It Va dev.nvme.0.int_coal_threshold
(R/W) Interrupt coalescing threshold in number of command completions.
Set to 0 to disable.
.It Va dev.nvme.0.apst_itpt_factor
(R/W) Proportionality constant for the Autonomous Power State Transition
between the total latency of the target state and its idle time before the
switch.
Set to 0 to disable.
.It Va dev.nvme.0.apst_max_latency
(R/W) Maximum total latency in microseconds, beyond which autonomous
transitions to slower non-operational power states are prevented.
Set to 0 to disable.
.El
.Pp
The following queue pair-level sysctls are currently implemented.
Expand Down
22 changes: 22 additions & 0 deletions sys/dev/nvme/nvme_ctrlr.c
Original file line number Diff line number Diff line change
Expand Up @@ -887,6 +887,27 @@ nvme_ctrlr_configure_int_coalescing(struct nvme_controller *ctrlr)
ctrlr->int_coal_threshold, NULL, NULL);
}

static void
nvme_ctrlr_configure_apst(struct nvme_controller *ctrlr)
{
int rv1, rv2;

ctrlr->apst_itpt_factor = 0;
rv1 = TUNABLE_INT_FETCH("hw.nvme.apst_itpt_factor",
&ctrlr->apst_itpt_factor);

ctrlr->apst_max_latency = 0;
rv2 = TUNABLE_INT_FETCH("hw.nvme.apst_max_latency",
&ctrlr->apst_max_latency);

/* Keep the default controller settings if tunables aren't set. */
if (rv1 == 0 || rv2 == 0)
return;

nvme_ctrlr_cmd_set_apst(ctrlr, ctrlr->apst_itpt_factor,
ctrlr->apst_max_latency, NULL, NULL);
}

static void
nvme_ctrlr_hmb_free(struct nvme_controller *ctrlr)
{
Expand Down Expand Up @@ -1139,6 +1160,7 @@ nvme_ctrlr_start(void *ctrlr_arg, bool resetting)

nvme_ctrlr_configure_aer(ctrlr);
nvme_ctrlr_configure_int_coalescing(ctrlr);
nvme_ctrlr_configure_apst(ctrlr);

for (i = 0; i < ctrlr->num_io_queues; i++)
nvme_io_qpair_enable(&ctrlr->ioq[i]);
Expand Down
59 changes: 59 additions & 0 deletions sys/dev/nvme/nvme_ctrlr_cmd.c
Original file line number Diff line number Diff line change
Expand Up @@ -253,6 +253,65 @@ nvme_ctrlr_cmd_set_interrupt_coalescing(struct nvme_controller *ctrlr,
0, 0, 0, 0, NULL, 0, cb_fn, cb_arg);
}

void
nvme_ctrlr_cmd_set_apst(struct nvme_controller *ctrlr, uint32_t itpt_factor,
uint32_t max_latency, nvme_cb_fn_t cb_fn, void *cb_arg)
{
struct nvme_completion_poll_status status;
struct nvme_power_state *ps;
uint64_t *entry, total_latency;
uint32_t cdw11, itps, itpt;

if (ctrlr->cdata.apsta == 0 || ctrlr->quirks & QUIRK_DISABLE_APST) {
nvme_printf(ctrlr, "APST is not supported by the device\n");
return;
}

if (ctrlr->cdata.npss > 31) {
nvme_printf(ctrlr, "invalid NPSS, APST is not set\n");
return;
}

entry = malloc(32 * sizeof(*entry), M_NVME, M_ZERO | M_WAITOK);
if (entry == NULL)
return;

cdw11 = (itpt_factor != 0 && max_latency != 0);
if (cdw11 == 0)
goto out;

ps = ctrlr->cdata.power_state;
for (itps = ctrlr->cdata.npss; itps > 0; --itps) {
/* The power state to transition to shall be a NOPS. */
if (!NVMEV(NVME_PWR_ST_NOPS, ps[itps].mps_nops)) {
entry[itps - 1] = entry[itps];
continue;
}

total_latency = ps[itps].enlat + ps[itps].exlat;
if (total_latency > max_latency)
continue;

/* 24-bit ITPT field specifies time in milliseconds. */
itpt = MIN(total_latency * itpt_factor / 1000, (1 << 24) - 1);
entry[itps - 1] = htole64(itpt << 8 | itps << 3);
}

out:
status.done = 0;
nvme_ctrlr_cmd_set_feature(ctrlr,
NVME_FEAT_AUTONOMOUS_POWER_STATE_TRANSITION, cdw11, 0, 0, 0, 0,
entry, 32 * sizeof(*entry), nvme_completion_poll_cb, &status);

nvme_completion_poll(&status);
if (nvme_completion_is_error(&status.cpl))
nvme_printf(ctrlr, "nvme_ctrlr_cmd_set_apst failed!\n");

free(entry, M_NVME);
if (cb_fn != NULL)
cb_fn(cb_arg, &status.cpl);
}

void
nvme_ctrlr_cmd_get_log_page(struct nvme_controller *ctrlr, uint8_t log_page,
uint32_t nsid, void *payload, uint32_t payload_size, nvme_cb_fn_t cb_fn,
Expand Down
8 changes: 8 additions & 0 deletions sys/dev/nvme/nvme_private.h
Original file line number Diff line number Diff line change
Expand Up @@ -222,6 +222,7 @@ struct nvme_controller {
#define QUIRK_DISABLE_TIMEOUT 2 /* Disable broken completion timeout feature */
#define QUIRK_INTEL_ALIGNMENT 4 /* Pre NVMe 1.3 performance alignment */
#define QUIRK_AHCI 8 /* Attached via AHCI redirect */
#define QUIRK_DISABLE_APST 16 /* Disable broken APST feature */

bus_space_tag_t bus_tag;
bus_space_handle_t bus_handle;
Expand Down Expand Up @@ -272,6 +273,10 @@ struct nvme_controller {
/** interrupt coalescing threshold */
uint32_t int_coal_threshold;

/** Autonomous power state transition */
uint32_t apst_itpt_factor;
uint32_t apst_max_latency;

/** timeout period in seconds */
uint32_t admin_timeout_period;
uint32_t timeout_period;
Expand Down Expand Up @@ -362,6 +367,9 @@ void nvme_ctrlr_cmd_set_interrupt_coalescing(struct nvme_controller *ctrlr,
uint32_t threshold,
nvme_cb_fn_t cb_fn,
void *cb_arg);
void nvme_ctrlr_cmd_set_apst(struct nvme_controller *ctrlr,
uint32_t itpt_factor, uint32_t max_latency,
nvme_cb_fn_t cb_fn, void *cb_arg);
void nvme_ctrlr_cmd_get_error_page(struct nvme_controller *ctrlr,
struct nvme_error_information_entry *payload,
uint32_t num_entries, /* 0 = max */
Expand Down
44 changes: 44 additions & 0 deletions sys/dev/nvme/nvme_sysctl.c
Original file line number Diff line number Diff line change
Expand Up @@ -130,6 +130,40 @@ nvme_sysctl_int_coal_threshold(SYSCTL_HANDLER_ARGS)
return (0);
}

static int
nvme_sysctl_apst_itpt_factor(SYSCTL_HANDLER_ARGS)
{
struct nvme_controller *ctrlr = arg1;
uint32_t value = ctrlr->apst_itpt_factor;
int error = sysctl_handle_int(oidp, &value, 0, req);

if (error || req->newptr == NULL)
return (error);

ctrlr->apst_itpt_factor = value;
nvme_ctrlr_cmd_set_apst(ctrlr, ctrlr->apst_itpt_factor,
ctrlr->apst_max_latency, NULL, NULL);

return (0);
}

static int
nvme_sysctl_apst_max_latency(SYSCTL_HANDLER_ARGS)
{
struct nvme_controller *ctrlr = arg1;
uint32_t value = ctrlr->apst_max_latency;
int error = sysctl_handle_int(oidp, &value, 0, req);

if (error || req->newptr == NULL)
return (error);

ctrlr->apst_max_latency = value;
nvme_ctrlr_cmd_set_apst(ctrlr, ctrlr->apst_itpt_factor,
ctrlr->apst_max_latency, NULL, NULL);

return (0);
}

static int
nvme_sysctl_timeout_period(SYSCTL_HANDLER_ARGS)
{
Expand Down Expand Up @@ -353,6 +387,16 @@ nvme_sysctl_initialize_ctrlr(struct nvme_controller *ctrlr)
nvme_sysctl_int_coal_threshold, "IU",
"Interrupt coalescing threshold");

SYSCTL_ADD_PROC(ctrlr_ctx, ctrlr_list, OID_AUTO, "apst_itpt_factor",
CTLTYPE_U32 | CTLFLAG_RW | CTLFLAG_MPSAFE, ctrlr, 0,
nvme_sysctl_apst_itpt_factor, "IU",
"APST idle time prior to transition factor");

SYSCTL_ADD_PROC(ctrlr_ctx, ctrlr_list, OID_AUTO, "apst_max_latency",
CTLTYPE_U32 | CTLFLAG_RW | CTLFLAG_MPSAFE, ctrlr, 0,
nvme_sysctl_apst_max_latency, "IU",
"APST upper total latency limit (in microseconds)");

SYSCTL_ADD_PROC(ctrlr_ctx, ctrlr_list, OID_AUTO,
"admin_timeout_period", CTLTYPE_UINT | CTLFLAG_RW | CTLFLAG_MPSAFE,
&ctrlr->admin_timeout_period, 0, nvme_sysctl_timeout_period, "IU",
Expand Down

0 comments on commit fb27132

Please sign in to comment.