Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Direct IO Support #10018

Merged
merged 1 commit into from
Sep 14, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
30 changes: 25 additions & 5 deletions cmd/zpool/zpool_main.c
Original file line number Diff line number Diff line change
Expand Up @@ -522,7 +522,7 @@ get_usage(zpool_help_t idx)
return (gettext("\tstatus [--power] [-j [--json-int, "
"--json-flat-vdevs, ...\n"
"\t --json-pool-key-guid]] [-c [script1,script2,...]] "
"[-DegiLpPstvx] ...\n"
"[-dDegiLpPstvx] ...\n"
"\t [-T d|u] [pool] [interval [count]]\n"));
case HELP_UPGRADE:
return (gettext("\tupgrade\n"
Expand Down Expand Up @@ -2602,6 +2602,7 @@ typedef struct status_cbdata {
boolean_t cb_print_unhealthy;
boolean_t cb_print_status;
boolean_t cb_print_slow_ios;
boolean_t cb_print_dio_verify;
boolean_t cb_print_vdev_init;
boolean_t cb_print_vdev_trim;
vdev_cmd_data_list_t *vcdl;
Expand Down Expand Up @@ -2879,7 +2880,7 @@ print_status_config(zpool_handle_t *zhp, status_cbdata_t *cb, const char *name,
uint_t c, i, vsc, children;
pool_scan_stat_t *ps = NULL;
vdev_stat_t *vs;
char rbuf[6], wbuf[6], cbuf[6];
char rbuf[6], wbuf[6], cbuf[6], dbuf[6];
char *vname;
uint64_t notpresent;
spare_cbdata_t spare_cb;
Expand Down Expand Up @@ -2997,6 +2998,17 @@ print_status_config(zpool_handle_t *zhp, status_cbdata_t *cb, const char *name,
printf(" %5s", "-");
}
}
if (VDEV_STAT_VALID(vs_dio_verify_errors, vsc) &&
cb->cb_print_dio_verify) {
zfs_nicenum(vs->vs_dio_verify_errors, dbuf,
sizeof (dbuf));

if (cb->cb_literal)
printf(" %5llu",
(u_longlong_t)vs->vs_dio_verify_errors);
else
printf(" %5s", dbuf);
}
}

if (nvlist_lookup_uint64(nv, ZPOOL_CONFIG_NOT_PRESENT,
Expand Down Expand Up @@ -10873,6 +10885,10 @@ status_callback(zpool_handle_t *zhp, void *data)
printf_color(ANSI_BOLD, " %5s", gettext("POWER"));
}

if (cbp->cb_print_dio_verify) {
printf_color(ANSI_BOLD, " %5s", gettext("DIO"));
}

if (cbp->vcdl != NULL)
print_cmd_columns(cbp->vcdl, 0);

Expand Down Expand Up @@ -10921,10 +10937,11 @@ status_callback(zpool_handle_t *zhp, void *data)
}

/*
* zpool status [-c [script1,script2,...]] [-DegiLpPstvx] [--power] [-T d|u] ...
* [pool] [interval [count]]
* zpool status [-c [script1,script2,...]] [-dDegiLpPstvx] [--power] ...
* [-T d|u] [pool] [interval [count]]
*
* -c CMD For each vdev, run command CMD
* -d Display Direct I/O write verify errors
* -D Display dedup status (undocumented)
* -e Display only unhealthy vdevs
* -g Display guid for individual vdev name.
Expand Down Expand Up @@ -10967,7 +10984,7 @@ zpool_do_status(int argc, char **argv)
};

/* check options */
while ((c = getopt_long(argc, argv, "c:jDegiLpPstT:vx", long_options,
while ((c = getopt_long(argc, argv, "c:jdDegiLpPstT:vx", long_options,
NULL)) != -1) {
switch (c) {
case 'c':
Expand All @@ -10994,6 +11011,9 @@ zpool_do_status(int argc, char **argv)
}
cmd = optarg;
break;
case 'd':
cb.cb_print_dio_verify = B_TRUE;
break;
case 'D':
if (++cb.cb_dedup_stats > 2)
cb.cb_dedup_stats = 2;
Expand Down
46 changes: 37 additions & 9 deletions cmd/ztest.c
Original file line number Diff line number Diff line change
Expand Up @@ -2262,6 +2262,13 @@ ztest_replay_write(void *arg1, void *arg2, boolean_t byteswap)
if (ztest_random(4) != 0) {
int prefetch = ztest_random(2) ?
DMU_READ_PREFETCH : DMU_READ_NO_PREFETCH;

/*
* We will randomly set when to do O_DIRECT on a read.
*/
if (ztest_random(4) == 0)
prefetch |= DMU_DIRECTIO;

ztest_block_tag_t rbt;

VERIFY(dmu_read(os, lr->lr_foid, offset,
Expand Down Expand Up @@ -2813,6 +2820,13 @@ ztest_io(ztest_ds_t *zd, uint64_t object, uint64_t offset)
enum ztest_io_type io_type;
uint64_t blocksize;
void *data;
uint32_t dmu_read_flags = DMU_READ_NO_PREFETCH;

/*
* We will randomly set when to do O_DIRECT on a read.
*/
if (ztest_random(4) == 0)
dmu_read_flags |= DMU_DIRECTIO;

VERIFY0(dmu_object_info(zd->zd_os, object, &doi));
blocksize = doi.doi_data_block_size;
Expand Down Expand Up @@ -2878,7 +2892,7 @@ ztest_io(ztest_ds_t *zd, uint64_t object, uint64_t offset)
(void) pthread_rwlock_unlock(&ztest_name_lock);

VERIFY0(dmu_read(zd->zd_os, object, offset, blocksize, data,
DMU_READ_NO_PREFETCH));
dmu_read_flags));

(void) ztest_write(zd, object, offset, blocksize, data);
break;
Expand Down Expand Up @@ -5045,6 +5059,13 @@ ztest_dmu_read_write(ztest_ds_t *zd, uint64_t id)
uint64_t stride = 123456789ULL;
uint64_t width = 40;
int free_percent = 5;
uint32_t dmu_read_flags = DMU_READ_PREFETCH;

/*
* We will randomly set when to do O_DIRECT on a read.
*/
if (ztest_random(4) == 0)
dmu_read_flags |= DMU_DIRECTIO;

/*
* This test uses two objects, packobj and bigobj, that are always
Expand Down Expand Up @@ -5123,10 +5144,10 @@ ztest_dmu_read_write(ztest_ds_t *zd, uint64_t id)
* Read the current contents of our objects.
*/
error = dmu_read(os, packobj, packoff, packsize, packbuf,
DMU_READ_PREFETCH);
dmu_read_flags);
ASSERT0(error);
error = dmu_read(os, bigobj, bigoff, bigsize, bigbuf,
DMU_READ_PREFETCH);
dmu_read_flags);
ASSERT0(error);

/*
Expand Down Expand Up @@ -5244,9 +5265,9 @@ ztest_dmu_read_write(ztest_ds_t *zd, uint64_t id)
void *bigcheck = umem_alloc(bigsize, UMEM_NOFAIL);

VERIFY0(dmu_read(os, packobj, packoff,
packsize, packcheck, DMU_READ_PREFETCH));
packsize, packcheck, dmu_read_flags));
VERIFY0(dmu_read(os, bigobj, bigoff,
bigsize, bigcheck, DMU_READ_PREFETCH));
bigsize, bigcheck, dmu_read_flags));

ASSERT0(memcmp(packbuf, packcheck, packsize));
ASSERT0(memcmp(bigbuf, bigcheck, bigsize));
Expand Down Expand Up @@ -5336,6 +5357,13 @@ ztest_dmu_read_write_zcopy(ztest_ds_t *zd, uint64_t id)
dmu_buf_t *bonus_db;
arc_buf_t **bigbuf_arcbufs;
dmu_object_info_t doi;
uint32_t dmu_read_flags = DMU_READ_PREFETCH;

/*
* We will randomly set when to do O_DIRECT on a read.
*/
if (ztest_random(4) == 0)
dmu_read_flags |= DMU_DIRECTIO;

size = sizeof (ztest_od_t) * OD_ARRAY_SIZE;
od = umem_alloc(size, UMEM_NOFAIL);
Expand Down Expand Up @@ -5466,10 +5494,10 @@ ztest_dmu_read_write_zcopy(ztest_ds_t *zd, uint64_t id)
*/
if (i != 0 || ztest_random(2) != 0) {
error = dmu_read(os, packobj, packoff,
packsize, packbuf, DMU_READ_PREFETCH);
packsize, packbuf, dmu_read_flags);
ASSERT0(error);
error = dmu_read(os, bigobj, bigoff, bigsize,
bigbuf, DMU_READ_PREFETCH);
bigbuf, dmu_read_flags);
ASSERT0(error);
}
compare_and_update_pbbufs(s, packbuf, bigbuf, bigsize,
Expand Down Expand Up @@ -5529,9 +5557,9 @@ ztest_dmu_read_write_zcopy(ztest_ds_t *zd, uint64_t id)
void *bigcheck = umem_alloc(bigsize, UMEM_NOFAIL);

VERIFY0(dmu_read(os, packobj, packoff,
packsize, packcheck, DMU_READ_PREFETCH));
packsize, packcheck, dmu_read_flags));
VERIFY0(dmu_read(os, bigobj, bigoff,
bigsize, bigcheck, DMU_READ_PREFETCH));
bigsize, bigcheck, dmu_read_flags));

ASSERT0(memcmp(packbuf, packcheck, packsize));
ASSERT0(memcmp(bigbuf, bigcheck, bigsize));
Expand Down
179 changes: 179 additions & 0 deletions config/kernel-get-user-pages.m4
Original file line number Diff line number Diff line change
@@ -0,0 +1,179 @@
dnl #
dnl # get_user_pages_unlocked() function was not available till 4.0.
dnl # In earlier kernels (< 4.0) get_user_pages() is available().
dnl #
dnl # 4.0 API change,
dnl # long get_user_pages_unlocked(struct task_struct *tsk,
dnl # struct mm_struct *mm, unsigned long start, unsigned long nr_pages,
dnl # int write, int force, struct page **pages)
dnl #
dnl # 4.8 API change,
dnl # long get_user_pages_unlocked(unsigned long start,
dnl # unsigned long nr_pages, int write, int force, struct page **page)
dnl #
dnl # 4.9 API change,
dnl # long get_user_pages_unlocked(usigned long start, int nr_pages,
dnl # struct page **pages, unsigned int gup_flags)
dnl #
bwatkinson marked this conversation as resolved.
Show resolved Hide resolved

dnl#
dnl# Check available get_user_pages/_unlocked interfaces.
dnl#
AC_DEFUN([ZFS_AC_KERNEL_SRC_GET_USER_PAGES], [
ZFS_LINUX_TEST_SRC([get_user_pages_unlocked_gup_flags], [
#include <linux/mm.h>
], [
unsigned long start = 0;
unsigned long nr_pages = 1;
unsigned int gup_flags = 0;
struct page **pages = NULL;
long ret __attribute__ ((unused));

ret = get_user_pages_unlocked(start, nr_pages, pages,
gup_flags);
])

ZFS_LINUX_TEST_SRC([get_user_pages_unlocked_write_flag], [
#include <linux/mm.h>
], [
unsigned long start = 0;
unsigned long nr_pages = 1;
int write = 0;
int force = 0;
long ret __attribute__ ((unused));
struct page **pages = NULL;

ret = get_user_pages_unlocked(start, nr_pages, write, force,
pages);
])

ZFS_LINUX_TEST_SRC([get_user_pages_unlocked_task_struct], [
#include <linux/mm.h>
], [
struct task_struct *tsk = NULL;
struct mm_struct *mm = NULL;
unsigned long start = 0;
unsigned long nr_pages = 1;
int write = 0;
int force = 0;
struct page **pages = NULL;
long ret __attribute__ ((unused));

ret = get_user_pages_unlocked(tsk, mm, start, nr_pages, write,
force, pages);
])

ZFS_LINUX_TEST_SRC([get_user_pages_unlocked_task_struct_gup_flags], [
#include <linux/mm.h>
], [
struct task_struct *tsk = NULL;
struct mm_struct *mm = NULL;
unsigned long start = 0;
unsigned long nr_pages = 1;
struct page **pages = NULL;
unsigned int gup_flags = 0;
long ret __attribute__ ((unused));

ret = get_user_pages_unlocked(tsk, mm, start, nr_pages,
pages, gup_flags);
])

ZFS_LINUX_TEST_SRC([get_user_pages_task_struct], [
#include <linux/mm.h>
], [
struct task_struct *tsk = NULL;
struct mm_struct *mm = NULL;
struct vm_area_struct **vmas = NULL;
unsigned long start = 0;
unsigned long nr_pages = 1;
int write = 0;
int force = 0;
struct page **pages = NULL;
int ret __attribute__ ((unused));

ret = get_user_pages(tsk, mm, start, nr_pages, write,
force, pages, vmas);
])
])

dnl #
dnl # Supported get_user_pages/_unlocked interfaces checked newest to oldest.
bwatkinson marked this conversation as resolved.
Show resolved Hide resolved
dnl # We first check for get_user_pages_unlocked as that is available in
dnl # newer kernels.
dnl #
AC_DEFUN([ZFS_AC_KERNEL_GET_USER_PAGES], [
dnl #
dnl # Current API (as of 4.9) of get_user_pages_unlocked
dnl #
AC_MSG_CHECKING([whether get_user_pages_unlocked() takes gup flags])
ZFS_LINUX_TEST_RESULT([get_user_pages_unlocked_gup_flags], [
AC_MSG_RESULT(yes)
AC_DEFINE(HAVE_GET_USER_PAGES_UNLOCKED_GUP_FLAGS, 1,
[get_user_pages_unlocked() takes gup flags])
], [
AC_MSG_RESULT(no)

dnl #
dnl # 4.8 API change, get_user_pages_unlocked
dnl #
AC_MSG_CHECKING(
[whether get_user_pages_unlocked() takes write flag])
ZFS_LINUX_TEST_RESULT([get_user_pages_unlocked_write_flag], [
AC_MSG_RESULT(yes)
AC_DEFINE(HAVE_GET_USER_PAGES_UNLOCKED_WRITE_FLAG, 1,
[get_user_pages_unlocked() takes write flag])
], [
AC_MSG_RESULT(no)

dnl #
dnl # 4.0-4.3, 4.5-4.7 API, get_user_pages_unlocked
dnl #
AC_MSG_CHECKING(
[whether get_user_pages_unlocked() takes task_struct])
ZFS_LINUX_TEST_RESULT(
[get_user_pages_unlocked_task_struct], [
AC_MSG_RESULT(yes)
AC_DEFINE(
HAVE_GET_USER_PAGES_UNLOCKED_TASK_STRUCT, 1,
[get_user_pages_unlocked() takes task_struct])
], [
AC_MSG_RESULT(no)

dnl #
dnl # 4.4 API, get_user_pages_unlocked
dnl #
AC_MSG_CHECKING(
[whether get_user_pages_unlocked() takes task_struct, gup_flags])
ZFS_LINUX_TEST_RESULT(
[get_user_pages_unlocked_task_struct_gup_flags], [
AC_MSG_RESULT(yes)
AC_DEFINE(
HAVE_GET_USER_PAGES_UNLOCKED_TASK_STRUCT_GUP_FLAGS, 1,
[get_user_pages_unlocked() takes task_struct, gup_flags])
], [
AC_MSG_RESULT(no)

dnl #
dnl # get_user_pages
dnl #
AC_MSG_CHECKING(
[whether get_user_pages() takes struct task_struct])
ZFS_LINUX_TEST_RESULT(
[get_user_pages_task_struct], [
AC_MSG_RESULT(yes)
AC_DEFINE(
HAVE_GET_USER_PAGES_TASK_STRUCT, 1,
[get_user_pages() takes task_struct])
], [
dnl #
dnl # If we cannot map the user's
dnl # pages in then we cannot do
dnl # Direct I/O
dnl #
ZFS_LINUX_TEST_ERROR([Direct I/O])
])
])
])
])
])
])
4 changes: 2 additions & 2 deletions config/kernel-vfs-direct_IO.m4
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
dnl #
dnl # Check for direct IO interfaces.
dnl # Check for Direct I/O interfaces.
dnl #
AC_DEFUN([ZFS_AC_KERNEL_SRC_VFS_DIRECT_IO], [
ZFS_LINUX_TEST_SRC([direct_io_iter], [
Expand Down Expand Up @@ -100,7 +100,7 @@ AC_DEFUN([ZFS_AC_KERNEL_VFS_DIRECT_IO], [
AC_DEFINE(HAVE_VFS_DIRECT_IO_IOVEC, 1,
[aops->direct_IO() uses iovec])
],[
ZFS_LINUX_TEST_ERROR([direct IO])
ZFS_LINUX_TEST_ERROR([Direct I/O])
AC_MSG_RESULT([no])
])
])
Expand Down
Loading
Loading