Skip to content

Commit

Permalink
[XRT-LITE] add ability to configure NPU power mode
Browse files Browse the repository at this point in the history
  • Loading branch information
makslevental committed Oct 19, 2024
1 parent fad9629 commit f51c793
Show file tree
Hide file tree
Showing 9 changed files with 139 additions and 11 deletions.
21 changes: 20 additions & 1 deletion build_tools/ci/run_matmul_test.sh
Original file line number Diff line number Diff line change
Expand Up @@ -796,6 +796,7 @@ fi
# note this will not actually show any devices because --xrt_lite_n_core_rows --xrt_lite_n_core_cols are not passed
# which i have omitted to make the conditional slightly more succinct
if [[ $($IREE_INSTALL_DIR/bin/iree-benchmark-module --dump_devices | grep xrt-lite) ]]; then

$IREE_INSTALL_DIR/bin/iree-benchmark-module \
--module=$OUTPUT_DIR/mm_test1_bf16_f32_m64_n64_k64.vmfb \
--function=matmul_64x64_64xbf16_ \
Expand All @@ -804,7 +805,25 @@ if [[ $($IREE_INSTALL_DIR/bin/iree-benchmark-module --dump_devices | grep xrt-li
--device=xrt-lite \
--benchmark_repetitions=10 \
--xrt_lite_n_core_rows=$XRT_LITE_N_CORE_ROWS \
--xrt_lite_n_core_cols=$XRT_LITE_N_CORE_COLS
--xrt_lite_n_core_cols=$XRT_LITE_N_CORE_COLS \

# TURBO POWER!!!!!!!!!!!!!!!!!
set +o pipefail
sudo -nv 2>&1 && has_sudo="true" || has_sudo="false"
set -o pipefail
if [ has_sudo == "true" ]; then
sudo $IREE_INSTALL_DIR/bin/iree-benchmark-module \
--module=$OUTPUT_DIR/mm_test1_bf16_f32_m64_n64_k64.vmfb \
--function=matmul_64x64_64xbf16_ \
--input=64x64xbf16 \
--input=64x64xbf16 \
--device=xrt-lite \
--benchmark_repetitions=10 \
--xrt_lite_n_core_rows=$XRT_LITE_N_CORE_ROWS \
--xrt_lite_n_core_cols=$XRT_LITE_N_CORE_COLS \
--xrt_lite_power_mode=turbo
fi

fi

echo "$MATMUL_TESTS_RUN matmul tests run!"
Expand Down
2 changes: 2 additions & 0 deletions runtime/src/iree-amd-aie/driver/xrt-lite/api.h
Original file line number Diff line number Diff line change
Expand Up @@ -7,12 +7,14 @@
#ifndef IREE_AMD_AIE_DRIVER_XRT_LITE_API_H_
#define IREE_AMD_AIE_DRIVER_XRT_LITE_API_H_

#include "iree-amd-aie/driver/xrt-lite/shim/linux/kmq/amdxdna_accel.h"
#include "iree/base/api.h"
#include "iree/hal/api.h"

struct iree_hal_xrt_lite_device_params {
int32_t n_core_rows;
int32_t n_core_cols;
iree_string_view_t power_mode;
};

IREE_API_EXPORT void iree_hal_xrt_lite_device_options_initialize(
Expand Down
26 changes: 24 additions & 2 deletions runtime/src/iree-amd-aie/driver/xrt-lite/device.cc
Original file line number Diff line number Diff line change
Expand Up @@ -30,8 +30,26 @@ iree_hal_xrt_lite_device::iree_hal_xrt_lite_device(

iree_hal_resource_initialize(&iree_hal_xrt_lite_device_vtable, &resource);
this->host_allocator = host_allocator;
shim_device =
new shim_xdna::device(options->n_core_rows, options->n_core_cols);
this->power_mode = options->power_mode;
if (iree_string_view_equal(power_mode, IREE_SV("default"))) {
shim_device = new shim_xdna::device(
options->n_core_rows, options->n_core_cols, POWER_MODE_DEFAULT);
} else if (iree_string_view_equal(power_mode, IREE_SV("low"))) {
shim_device = new shim_xdna::device(options->n_core_rows,
options->n_core_cols, POWER_MODE_LOW);
} else if (iree_string_view_equal(power_mode, IREE_SV("medium"))) {
shim_device = new shim_xdna::device(
options->n_core_rows, options->n_core_cols, POWER_MODE_MEDIUM);
} else if (iree_string_view_equal(power_mode, IREE_SV("high"))) {
shim_device = new shim_xdna::device(options->n_core_rows,
options->n_core_cols, POWER_MODE_HIGH);
} else if (iree_string_view_equal(power_mode, IREE_SV("turbo"))) {
shim_device = new shim_xdna::device(options->n_core_rows,
options->n_core_cols, POWER_MODE_TURBO);
} else {
shim_device =
new shim_xdna::device(options->n_core_rows, options->n_core_cols);
}

iree_status_t status = iree_hal_xrt_lite_allocator_create(
host_allocator, shim_device, &device_allocator);
Expand Down Expand Up @@ -202,6 +220,10 @@ static void iree_hal_xrt_lite_device_destroy(iree_hal_device_t* base_device) {
base_device, iree_hal_xrt_lite_device_vtable, iree_hal_xrt_lite_device);

iree_hal_allocator_release(device->device_allocator);
if (!iree_string_view_is_empty(device->power_mode) &&
!iree_string_view_equal(device->power_mode, IREE_SV("default"))) {
device->shim_device->set_power_mode(POWER_MODE_DEFAULT);
}
delete device->shim_device;
iree_allocator_free(device->host_allocator, device);

Expand Down
1 change: 1 addition & 0 deletions runtime/src/iree-amd-aie/driver/xrt-lite/device.h
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,7 @@ struct iree_hal_xrt_lite_device {
// should come last; see the definition of total_size below in
// iree_hal_xrt_lite_device_create
iree_string_view_t identifier;
iree_string_view_t power_mode;

iree_hal_xrt_lite_device(const iree_hal_xrt_lite_device_params* options,
iree_allocator_t host_allocator);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -14,11 +14,15 @@ IREE_FLAG(int32_t, xrt_lite_n_core_rows, 0,
"Number of core rows to use on NPU.");
IREE_FLAG(int32_t, xrt_lite_n_core_cols, 0,
"Number of core cols to use on NPU.");
// see shim/linux/kmq/amdxdna_accel.h#L460 for options
IREE_FLAG(string, xrt_lite_power_mode, "", "Set the power mode of the NPU.");

static const iree_string_view_t key_xrt_lite_n_core_rows =
iree_string_view_literal("xrt_lite_n_core_rows");
static const iree_string_view_t key_xrt_lite_n_core_cols =
iree_string_view_literal("xrt_lite_n_core_cols");
static const iree_string_view_t key_xrt_lite_power_mode =
iree_string_view_literal("xrt_lite_power_mode");

static iree_status_t iree_hal_xrt_lite_driver_factory_enumerate(
void* self, iree_host_size_t* out_driver_info_count,
Expand Down Expand Up @@ -46,6 +50,13 @@ static iree_status_t iree_hal_xrt_lite_driver_parse_flags(
IREE_RETURN_AND_END_ZONE_IF_ERROR(
z0, iree_string_pair_builder_add_int32(builder, key_xrt_lite_n_core_cols,
FLAG_xrt_lite_n_core_cols));
iree_string_view_t power_mode = IREE_SV(FLAG_xrt_lite_power_mode);
if (!iree_string_view_is_empty(power_mode)) {
IREE_RETURN_AND_END_ZONE_IF_ERROR(
z0, iree_string_pair_builder_add(
builder,
iree_make_string_pair(key_xrt_lite_power_mode, power_mode)));
}

IREE_TRACE_ZONE_END(z0);
return iree_ok_status();
Expand All @@ -68,14 +79,14 @@ static iree_status_t iree_hal_xrt_lite_driver_populate_options(
IREE_TRACE_ZONE_END(z0);
return iree_make_status(
IREE_STATUS_FAILED_PRECONDITION,
"Option 'key_xrt_lite_n_core_rows' expected to be int. Got: '%.*s'",
"Option 'xrt_lite_n_core_rows' expected to be int. Got: '%.*s'",
(int)value.size, value.data);
}
if (ivalue <= 0) {
IREE_TRACE_ZONE_END(z0);
return iree_make_status(
IREE_STATUS_FAILED_PRECONDITION,
"Option 'key_xrt_lite_n_core_rows' expected to be > 0. Got: '%.*s'",
"Option 'xrt_lite_n_core_rows' expected to be > 0. Got: '%.*s'",
(int)value.size, value.data);
}
device_params->n_core_rows = ivalue;
Expand All @@ -84,20 +95,34 @@ static iree_status_t iree_hal_xrt_lite_driver_populate_options(
IREE_TRACE_ZONE_END(z0);
return iree_make_status(
IREE_STATUS_FAILED_PRECONDITION,
"Option 'key_xrt_lite_n_core_cols' expected to be int. Got: '%.*s'",
"Option 'xrt_lite_n_core_cols' expected to be int. Got: '%.*s'",
(int)value.size, value.data);
}
if (ivalue <= 0) {
IREE_TRACE_ZONE_END(z0);
return iree_make_status(
IREE_STATUS_FAILED_PRECONDITION,
"Option 'key_xrt_lite_n_core_cols' expected to be > 0. Got: '%.*s'",
"Option 'xrt_lite_n_core_cols' expected to be > 0. Got: '%.*s'",
(int)value.size, value.data);
}
device_params->n_core_cols = ivalue;
} else if (iree_string_view_equal(key, key_xrt_lite_power_mode)) {
if (!(iree_string_view_equal(value, IREE_SV("default")) ||
iree_string_view_equal(value, IREE_SV("low")) ||
iree_string_view_equal(value, IREE_SV("medium")) ||
iree_string_view_equal(value, IREE_SV("high")) ||
iree_string_view_equal(value, IREE_SV("turbo")))) {
IREE_TRACE_ZONE_END(z0);
return iree_make_status(
IREE_STATUS_FAILED_PRECONDITION,
"Option 'xrt_lite_power_mode' expected to be default | low | "
"medium | high | turbo. Got: '%.*s'",
(int)value.size, value.data);
}
device_params->power_mode = value;
} else {
IREE_TRACE_ZONE_END(z0);
return iree_make_status(IREE_STATUS_FAILED_PRECONDITION,
return iree_make_status(IREE_STATUS_INVALID_ARGUMENT,
"Unrecognized options: %.*s", (int)key.size,
key.data);
}
Expand Down
52 changes: 52 additions & 0 deletions runtime/src/iree-amd-aie/driver/xrt-lite/shim/linux/kmq/device.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -146,6 +146,15 @@ device::device(uint32_t n_rows, uint32_t n_cols)
SHIM_DEBUG("Created KMQ device n_rows %d n_cols %d", n_rows, n_cols);
}

device::device(uint32_t n_rows, uint32_t n_cols,
amdxdna_power_mode_type power_mode)
: device(n_rows, n_cols) {
set_power_mode(power_mode);
SHIM_DEBUG("Created KMQ device n_rows %d n_cols %d with power_mode %s",
n_rows, n_cols,
stringify_amdxdna_power_mode_type(power_mode).c_str());
}

device::~device() { SHIM_DEBUG("Destroying KMQ device"); }

const pdev &device::get_pdev() const { return m_pdev; }
Expand Down Expand Up @@ -249,6 +258,31 @@ void device::write_aie_reg(uint16_t col, uint16_t row, uint32_t reg_addr,
m_pdev.ioctl(DRM_IOCTL_AMDXDNA_SET_STATE, &arg);
}

amdxdna_power_mode_type device::get_power_mode() const {
amdxdna_drm_get_power_mode state;
amdxdna_drm_get_info arg = {.param = DRM_AMDXDNA_GET_POWER_MODE,
.buffer_size = sizeof(state),
.buffer = reinterpret_cast<uintptr_t>(&state)};

m_pdev.ioctl(DRM_IOCTL_AMDXDNA_GET_INFO, &arg);
return static_cast<amdxdna_power_mode_type>(state.power_mode);
}

void device::set_power_mode(amdxdna_power_mode_type mode) const {
amdxdna_drm_set_power_mode state;
state.power_mode = mode;
amdxdna_drm_set_state arg = {.param = DRM_AMDXDNA_SET_POWER_MODE,
.buffer_size = sizeof(state),
.buffer = reinterpret_cast<uintptr_t>(&state)};
if (::ioctl(m_pdev.m_dev_fd, DRM_IOCTL_AMDXDNA_SET_STATE, &arg) == -1) {
shim_err(
errno,
"DRM_AMDXDNA_SET_POWER_MODE failed; probably you need sudo privileges");
}
SHIM_DEBUG("set power_mode to %s",
stringify_amdxdna_power_mode_type(mode).c_str());
}

std::string read_sysfs(const std::string &filename) {
std::ifstream file(filename);
std::string line;
Expand All @@ -274,4 +308,22 @@ std::filesystem::path find_npu_device() {
shim_err(errno, "No npu device found");
}

std::string stringify_amdxdna_power_mode_type(
amdxdna_power_mode_type power_mode) {
switch (power_mode) {
case POWER_MODE_DEFAULT:
return {"DEFAULT"};
case POWER_MODE_LOW:
return {"LOW"};
case POWER_MODE_MEDIUM:
return {"MEDIUM"};
case POWER_MODE_HIGH:
return {"HIGH"};
case POWER_MODE_TURBO:
return {"TURBO"};
default:
llvm::report_fatal_error("unknown power mode");
}
}

} // namespace shim_xdna
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@
#include <filesystem>
#include <map>

#include "amdxdna_accel.h"
#include "fence.h"
#include "xrt_mem.h"

Expand All @@ -33,6 +34,7 @@ struct device {
uint32_t n_cols;

device(uint32_t n_rows, uint32_t n_cols);
device(uint32_t n_rows, uint32_t n_cols, amdxdna_power_mode_type power_mode);
~device();

std::unique_ptr<bo> import_bo(int ehdl) const;
Expand All @@ -58,12 +60,18 @@ struct device {
void write_aie_reg(uint16_t col, uint16_t row, uint32_t reg_addr,
uint32_t reg_val);

// TODO(max): hide amdxdna_accel enums so they don't leak
amdxdna_power_mode_type get_power_mode() const;
void set_power_mode(amdxdna_power_mode_type mode) const;

std::unique_ptr<fence_handle> create_fence(fence_handle::access_mode);
std::unique_ptr<fence_handle> import_fence(pid_t, int);
};

std::string read_sysfs(const std::string &filename);
std::filesystem::path find_npu_device();
std::string stringify_amdxdna_power_mode_type(
amdxdna_power_mode_type power_mode);

} // namespace shim_xdna

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@
#include <cassert>
#include <cstring>

#include "amdxdna_accel.h"
#include "bo.h"
#include "hwq.h"
#include "shim_debug.h"
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,6 @@

#include <map>

#include "amdxdna_accel.h"
#include "device.h"

namespace shim_xdna {
Expand Down Expand Up @@ -54,8 +53,7 @@ struct hw_ctx {
std::unique_ptr<hw_q> q, const std::vector<uint8_t> &pdi,
const std::string &cu_name, uint32_t n_rows, uint32_t n_cols);
hw_ctx(device &dev, const std::vector<uint8_t> &pdi,
const std::string &cu_name,
uint32_t n_rows, uint32_t n_cols,
const std::string &cu_name, uint32_t n_rows, uint32_t n_cols,
const std::map<std::string, uint32_t> &qos = {});
~hw_ctx();
// no copying
Expand Down

0 comments on commit f51c793

Please sign in to comment.