diff --git a/include/sys/abd_impl.h b/include/sys/abd_impl.h index 35a64f8621a5..77dc50ec109b 100644 --- a/include/sys/abd_impl.h +++ b/include/sys/abd_impl.h @@ -104,6 +104,7 @@ void abd_free_linear_page(abd_t *); void abd_iter_init(struct abd_iter *, abd_t *); boolean_t abd_iter_at_end(struct abd_iter *); void abd_iter_advance(struct abd_iter *, size_t); +size_t abd_iter_size(struct abd_iter *aiter); void abd_iter_map(struct abd_iter *); void abd_iter_unmap(struct abd_iter *); void abd_iter_page(struct abd_iter *); @@ -120,6 +121,187 @@ void abd_iter_page(struct abd_iter *); #define ABD_LINEAR_BUF(abd) ((abd)->abd_u.abd_linear.abd_buf) #define ABD_GANG(abd) ((abd)->abd_u.abd_gang) +/* + * Chunk iterators. + * + * This is a new type of ABD iterator that iterates over data chunks. The idea + * is that since ABDs are effectively a wrapper over a set of memory regions, + * an iterator that yields data chunks can be smaller and simpler. + * + * The iterator object abd_chunk_t can be thought of as a pointer to a chunk + * within an array of chunks. There are three main functions involved with + * setting up and using an iterator: + * + * - abd_chunk_t ch = abd_chunk_start(abd_t *abd, size_t off, size_t size) + * + * Create a new iterator over the given ABD, starting at off bytes, for size + * bytes. If off and size would fall outside of the ABD, the returned + * iterator will be unusable (already "done"). + * + * - void abd_chunk_advance(abd_chunk_t *ch) + * + * Move the iterator to the next chunk. If there is no next chunk, the + * iterator is changed to the "done" state and is no longer useable. + * + * - boolean_t abd_chunk_done(abd_chunk_t *ch) + * + * If true, the iterator is pointing to a valid chunk, and the underlying + * memory can be accessed with the access functions. If false, the iterator + * is exhausted and no longer useable. + * + * Together, these allow an ABD to be processed in a for loop: + * + * for (abd_chunk_t ch = abd_chunk_start(abd, off, size); + * !abd_chunk_done(&ch); abd_chunk_advance(&ch)) + * + * With a valid chunk iterator, the following functions can be used to work + * with the underlying data or memory: + * + * - size_t abd_chunk_size(abd_chunk_t *ch) + * + * The number of data bytes within the chunk. + * + * - void *data = abd_chunk_map(abd_chunk_t *ch) + * + * Map the memory within the chunk into the address space, and return a + * pointer to the start of the data. + * + * - void abd_chunk_unmap(abd_chunk_t *ch) + * + * Unmap previously-mapped chunk memory. For convenience, if there is nothing + * mapped, nothing happens. + */ + +/* XXX temp exposing old iterator control functions for use in chunk iters */ +abd_t *abd_init_abd_iter(abd_t *abd, struct abd_iter *aiter, size_t off); +abd_t *abd_advance_abd_iter(abd_t *abd, abd_t *cabd, struct abd_iter *aiter, + size_t len); + +typedef struct { + abd_t *ch_abd; /* ABD being iterated over */ + + size_t ch_coff; /* chunk offset within ABD */ + size_t ch_csize; /* size of chunk within ABD */ + size_t ch_doff; /* data offset within chunk */ + size_t ch_dsize; /* size of data remaining in iter */ + + struct abd_iter ch_iter; /* XXX old-style iterator */ + abd_t *ch_cabd; /* XXX child abd, for gang iter */ +} abd_chunk_t; + +static inline abd_chunk_t +abd_chunk_start(abd_t *abd, size_t off, size_t size) +{ + abd_chunk_t ch = { + .ch_abd = abd, + }; + + if (size == 0 || (off + size > abd_get_size(abd))) { + ch.ch_dsize = 0; + ch.ch_doff = 0; + return (ch); + } + + abd_verify(abd); + ASSERT3U(off + size, <=, abd->abd_size); + + /* start of data, size of data */ + ch.ch_doff = off; + ch.ch_dsize = size; + + ch.ch_cabd = abd_init_abd_iter(abd, &ch.ch_iter, 0); + + /* size of first chunk */ + ch.ch_coff = 0; + ch.ch_csize = abd_iter_size(&ch.ch_iter); + + /* roll chunks forward until we reach the one with the data start */ + while (ch.ch_doff >= ch.ch_csize) { + ch.ch_doff -= ch.ch_csize; + ch.ch_coff += ch.ch_csize; + + ch.ch_cabd = abd_advance_abd_iter(ch.ch_abd, ch.ch_cabd, + &ch.ch_iter, ch.ch_csize); + + ch.ch_csize = abd_iter_size(&ch.ch_iter); + } + + return (ch); +} + +static inline void +abd_chunk_advance(abd_chunk_t *ch) +{ + ASSERT3U(ch->ch_dsize, >, 0); + + /* consume data up to the end of the chunk */ + ch->ch_dsize -= MIN(ch->ch_dsize, ch->ch_csize - ch->ch_doff); + + /* next data will be at the start of the next chunk */ + ch->ch_doff = 0; + + /* no more data, so return */ + if (ch->ch_dsize == 0) + return; + + ch->ch_cabd = abd_advance_abd_iter(ch->ch_abd, ch->ch_cabd, + &ch->ch_iter, ch->ch_csize); + + ch->ch_coff += ch->ch_csize; + ch->ch_csize = abd_iter_size(&ch->ch_iter); +} + +static inline boolean_t +abd_chunk_done(abd_chunk_t *ch) +{ + return (ch->ch_dsize == 0); +} + +static inline size_t +abd_chunk_size(abd_chunk_t *ch) +{ + return (MIN(ch->ch_dsize, ch->ch_csize - ch->ch_doff)); +} + +static inline void * +abd_chunk_map(abd_chunk_t *ch) { + abd_iter_map(&ch->ch_iter); + ASSERT3U(ch->ch_iter.iter_mapsize, ==, ch->ch_csize); + return (ch->ch_iter.iter_mapaddr + ch->ch_doff); +} + +static inline void +abd_chunk_unmap(abd_chunk_t *ch) { + if (ch->ch_iter.iter_mapaddr == NULL) + return; + abd_iter_unmap(&ch->ch_iter); +} + +/* + * Macro to iterate over an ABD in the most common case, where you want to + * do some operation on the actual data. Safe to break out early. + * + * data and dsize are the name of the buffer and size variables to use inside + * the loop. + * + * abd_for_each_chunk(abd, off, size, data, dsize, { + * // do work on data an dsize + * }); + */ +#define abd_for_each_chunk(abd, off, size, __data, __dsize, __code) \ + do { \ + void *__data; \ + size_t __dsize; \ + abd_chunk_t __ch = abd_chunk_start(abd, off, size); \ + for (; !abd_chunk_done(&__ch) && \ + (__data = abd_chunk_map(&__ch)) && \ + (__dsize = abd_chunk_size(&__ch)); \ + abd_chunk_unmap(&__ch), abd_chunk_advance(&__ch)) { \ + __code; \ + } \ + abd_chunk_unmap(&__ch); \ + } while (0) + #ifdef __cplusplus } #endif diff --git a/lib/libzpool/abd_os.c b/lib/libzpool/abd_os.c index 8531b8f40ace..28f3d7bebcc6 100644 --- a/lib/libzpool/abd_os.c +++ b/lib/libzpool/abd_os.c @@ -311,6 +311,38 @@ abd_iter_advance(struct abd_iter *aiter, size_t amount) ASSERT3U(aiter->iter_pos, <=, aiter->iter_abd->abd_size); } +/* Data in current linear chunk is always just distance to end of ABD */ +#define ABD_ITER_LINEAR_SIZE(aiter) \ + ((aiter)->iter_abd->abd_size - (aiter)->iter_pos) + +/* + * Data in current scatter chunk is always distance to end of chunk, or in + * the last chunk, distance to end of ABD + */ +#define ABD_ITER_SCATTER_SIZE(aiter) \ + (MIN(ABD_PAGESIZE - (((aiter)->iter_pos + \ + ABD_SCATTER((aiter)->iter_abd).abd_offset) & ABD_PAGEMASK), \ + (aiter)->iter_abd->abd_size - (aiter)->iter_pos)) \ + +/* + * Return size of data in current chunk (_not_ underlying memory size). + */ +size_t +abd_iter_size(struct abd_iter *aiter) +{ + /* If it's already mapped, just return that size. */ + if (aiter->iter_mapsize > 0) + return (aiter->iter_mapsize); + + /* There's no current chunk if the iterator is exhausted. */ + if (abd_iter_at_end(aiter)) + return (0); + + if (abd_is_linear(aiter->iter_abd)) + return (ABD_ITER_LINEAR_SIZE(aiter)); + return (ABD_ITER_SCATTER_SIZE(aiter)); +} + void abd_iter_map(struct abd_iter *aiter) { @@ -323,8 +355,7 @@ abd_iter_map(struct abd_iter *aiter) if (abd_is_linear(aiter->iter_abd)) { aiter->iter_mapaddr = ABD_LINEAR_BUF(aiter->iter_abd) + aiter->iter_pos; - aiter->iter_mapsize = - aiter->iter_abd->abd_size - aiter->iter_pos; + aiter->iter_mapsize = ABD_ITER_LINEAR_SIZE(aiter); return; } @@ -342,6 +373,7 @@ abd_iter_map(struct abd_iter *aiter) aiter->iter_mapsize = MIN(ABD_PAGESIZE - (poff & ABD_PAGEMASK), aiter->iter_abd->abd_size - aiter->iter_pos); ASSERT3U(aiter->iter_mapsize, <=, ABD_PAGESIZE); + ASSERT3U(aiter->iter_mapsize, ==, ABD_ITER_SCATTER_SIZE(aiter)); aiter->iter_mapaddr = iov->iov_base + (poff & ABD_PAGEMASK); } diff --git a/module/os/linux/zfs/abd_os.c b/module/os/linux/zfs/abd_os.c index 39ea3e62dba0..0e8e09a7581b 100644 --- a/module/os/linux/zfs/abd_os.c +++ b/module/os/linux/zfs/abd_os.c @@ -890,6 +890,36 @@ abd_iter_advance(struct abd_iter *aiter, size_t amount) } } +/* Data in current linear chunk is always just offset to end of ABD */ +#define ABD_ITER_LINEAR_SIZE(aiter) \ + ((aiter)->iter_abd->abd_size - (aiter)->iter_offset) + +/* + * Data in current scatter chunk is always distance to end of chunk, or in + * the last chunk, distance to end of ABD + */ +#define ABD_ITER_SCATTER_SIZE(aiter) \ + (MIN((aiter)->iter_sg->length - (aiter)->iter_offset, \ + (aiter)->iter_abd->abd_size - (aiter)->iter_pos)) +/* + * Return size of data in current chunk (_not_ underlying memory size). + */ +size_t +abd_iter_size(struct abd_iter *aiter) +{ + /* If it's already mapped, just return that size. */ + if (aiter->iter_mapsize > 0) + return (aiter->iter_mapsize); + + /* There's no current chunk if the iterator is exhausted. */ + if (abd_iter_at_end(aiter)) + return (0); + + if (abd_is_linear(aiter->iter_abd)) + return (ABD_ITER_LINEAR_SIZE(aiter)); + return (ABD_ITER_SCATTER_SIZE(aiter)); +} + /* * Map the current chunk into aiter. This can be safely called when the aiter * has already exhausted, in which case this does nothing. @@ -898,7 +928,6 @@ void abd_iter_map(struct abd_iter *aiter) { void *paddr; - size_t offset = 0; ASSERT3P(aiter->iter_mapaddr, ==, NULL); ASSERT0(aiter->iter_mapsize); @@ -909,18 +938,14 @@ abd_iter_map(struct abd_iter *aiter) if (abd_is_linear(aiter->iter_abd)) { ASSERT3U(aiter->iter_pos, ==, aiter->iter_offset); - offset = aiter->iter_offset; - aiter->iter_mapsize = aiter->iter_abd->abd_size - offset; + aiter->iter_mapsize = ABD_ITER_LINEAR_SIZE(aiter); paddr = ABD_LINEAR_BUF(aiter->iter_abd); } else { - offset = aiter->iter_offset; - aiter->iter_mapsize = MIN(aiter->iter_sg->length - offset, - aiter->iter_abd->abd_size - aiter->iter_pos); - + aiter->iter_mapsize = ABD_ITER_SCATTER_SIZE(aiter); paddr = zfs_kmap_local(sg_page(aiter->iter_sg)); } - aiter->iter_mapaddr = (char *)paddr + offset; + aiter->iter_mapaddr = (char *)paddr + aiter->iter_offset; } /* diff --git a/module/zfs/abd.c b/module/zfs/abd.c index 529deeecfd4b..d2b44e776245 100644 --- a/module/zfs/abd.c +++ b/module/zfs/abd.c @@ -718,7 +718,7 @@ abd_take_ownership_of_buf(abd_t *abd, boolean_t is_metadata) * Initializes an abd_iter based on whether the abd is a gang ABD * or just a single ABD. */ -static inline abd_t * +abd_t * abd_init_abd_iter(abd_t *abd, struct abd_iter *aiter, size_t off) { abd_t *cabd = NULL; @@ -741,7 +741,7 @@ abd_init_abd_iter(abd_t *abd, struct abd_iter *aiter, size_t off) * advancing could mean that we are at the end of a particular ABD and * must grab the ABD in the gang ABD's list. */ -static inline abd_t * +abd_t * abd_advance_abd_iter(abd_t *abd, abd_t *cabd, struct abd_iter *aiter, size_t len) { @@ -761,35 +761,13 @@ int abd_iterate_func(abd_t *abd, size_t off, size_t size, abd_iter_func_t *func, void *private) { - struct abd_iter aiter; int ret = 0; - if (size == 0) - return (0); - - abd_verify(abd); - ASSERT3U(off + size, <=, abd->abd_size); - - abd_t *c_abd = abd_init_abd_iter(abd, &aiter, off); - - while (size > 0) { - IMPLY(abd_is_gang(abd), c_abd != NULL); - - abd_iter_map(&aiter); - - size_t len = MIN(aiter.iter_mapsize, size); - ASSERT3U(len, >, 0); - - ret = func(aiter.iter_mapaddr, len, private); - - abd_iter_unmap(&aiter); - + abd_for_each_chunk(abd, off, size, data, dsize, { + ret = func(data, dsize, private); if (ret != 0) break; - - size -= len; - c_abd = abd_advance_abd_iter(abd, c_abd, &aiter, len); - } + }); return (ret); } @@ -836,43 +814,17 @@ abd_iterate_page_func(abd_t *abd, size_t off, size_t size, } #endif -struct buf_arg { - void *arg_buf; -}; - -static int -abd_copy_to_buf_off_cb(void *buf, size_t size, void *private) -{ - struct buf_arg *ba_ptr = private; - - (void) memcpy(ba_ptr->arg_buf, buf, size); - ba_ptr->arg_buf = (char *)ba_ptr->arg_buf + size; - - return (0); -} - /* * Copy abd to buf. (off is the offset in abd.) */ void abd_copy_to_buf_off(void *buf, abd_t *abd, size_t off, size_t size) { - struct buf_arg ba_ptr = { buf }; - - (void) abd_iterate_func(abd, off, size, abd_copy_to_buf_off_cb, - &ba_ptr); -} - -static int -abd_cmp_buf_off_cb(void *buf, size_t size, void *private) -{ - int ret; - struct buf_arg *ba_ptr = private; - - ret = memcmp(buf, ba_ptr->arg_buf, size); - ba_ptr->arg_buf = (char *)ba_ptr->arg_buf + size; - - return (ret); + char *c = buf; + abd_for_each_chunk(abd, off, size, data, dsize, { + memcpy(c, data, dsize); + c += dsize; + }); } /* @@ -881,20 +833,17 @@ abd_cmp_buf_off_cb(void *buf, size_t size, void *private) int abd_cmp_buf_off(abd_t *abd, const void *buf, size_t off, size_t size) { - struct buf_arg ba_ptr = { (void *) buf }; - - return (abd_iterate_func(abd, off, size, abd_cmp_buf_off_cb, &ba_ptr)); -} - -static int -abd_copy_from_buf_off_cb(void *buf, size_t size, void *private) -{ - struct buf_arg *ba_ptr = private; + int ret = 0; + const char *c = buf; - (void) memcpy(buf, ba_ptr->arg_buf, size); - ba_ptr->arg_buf = (char *)ba_ptr->arg_buf + size; + abd_for_each_chunk(abd, off, size, data, dsize, { + ret = memcmp(c, data, dsize); + if (ret != 0) + break; + c += dsize; + }); - return (0); + return (ret); } /* @@ -903,18 +852,12 @@ abd_copy_from_buf_off_cb(void *buf, size_t size, void *private) void abd_copy_from_buf_off(abd_t *abd, const void *buf, size_t off, size_t size) { - struct buf_arg ba_ptr = { (void *) buf }; + const char *c = buf; - (void) abd_iterate_func(abd, off, size, abd_copy_from_buf_off_cb, - &ba_ptr); -} - -static int -abd_zero_off_cb(void *buf, size_t size, void *private) -{ - (void) private; - (void) memset(buf, 0, size); - return (0); + abd_for_each_chunk(abd, off, size, data, dsize, { + memcpy(data, c, dsize); + c += dsize; + }); } /* @@ -923,7 +866,9 @@ abd_zero_off_cb(void *buf, size_t size, void *private) void abd_zero_off(abd_t *abd, size_t off, size_t size) { - (void) abd_iterate_func(abd, off, size, abd_zero_off_cb, NULL); + abd_for_each_chunk(abd, off, size, data, dsize, { + memset(data, 0, dsize); + }); } /* @@ -936,49 +881,90 @@ abd_iterate_func2(abd_t *dabd, abd_t *sabd, size_t doff, size_t soff, size_t size, abd_iter_func2_t *func, void *private) { int ret = 0; - struct abd_iter daiter, saiter; - abd_t *c_dabd, *c_sabd; if (size == 0) return (0); - abd_verify(dabd); - abd_verify(sabd); - - ASSERT3U(doff + size, <=, dabd->abd_size); - ASSERT3U(soff + size, <=, sabd->abd_size); + abd_chunk_t sch = abd_chunk_start(sabd, soff, size); + abd_chunk_t dch = abd_chunk_start(dabd, doff, size); - c_dabd = abd_init_abd_iter(dabd, &daiter, doff); - c_sabd = abd_init_abd_iter(sabd, &saiter, soff); + /* + * Current chunk is always mapped inside the loop, and remapped after + * advance, so we can leave a larger chunk mapped across iterations. + */ + void *saddr = abd_chunk_map(&sch); + size_t ssize = abd_chunk_size(&sch); - while (size > 0) { - IMPLY(abd_is_gang(dabd), c_dabd != NULL); - IMPLY(abd_is_gang(sabd), c_sabd != NULL); + void *daddr = abd_chunk_map(&dch); + size_t dsize = abd_chunk_size(&dch); - abd_iter_map(&daiter); - abd_iter_map(&saiter); + /* + * Offset to data in current mapping. If we only take less than the + * full chunk then we need to offset the next iteration. + */ + soff = doff = 0; - size_t dlen = MIN(daiter.iter_mapsize, size); - size_t slen = MIN(saiter.iter_mapsize, size); - size_t len = MIN(dlen, slen); - ASSERT(dlen > 0 || slen > 0); + while (size > 0) { + ASSERT3U(ssize, >, 0); + ASSERT3U(dsize, >, 0); - ret = func(daiter.iter_mapaddr, saiter.iter_mapaddr, len, - private); + /* + * This iteration we take the largest amount without going + * past the end of the chunk. + */ + size_t isize = MIN(size, MIN(ssize, dsize)); - abd_iter_unmap(&saiter); - abd_iter_unmap(&daiter); + ret = func(daddr + doff, saddr + soff, isize, private); if (ret != 0) break; - size -= len; - c_dabd = - abd_advance_abd_iter(dabd, c_dabd, &daiter, len); - c_sabd = - abd_advance_abd_iter(sabd, c_sabd, &saiter, len); + /* + * If we've got all the data we want, eject now, rather than + * advance and remap below. This makes the while() condition + * above redundant. + */ + size -= isize; + if (size == 0) + break; + + /* + * If we consumed all of the source chunk, advance and remap. + * Otherwise, record the offset for the next iteration. + * + * Note the assertion on abd_chunk_done(). Because of the size + * check and break above, we need advance out of the last + * chunk, so the iterator will never be completed. + */ + ssize -= isize; + if (ssize == 0) { + abd_chunk_unmap(&sch); + abd_chunk_advance(&sch); + ASSERT(!abd_chunk_done(&sch)); + saddr = abd_chunk_map(&sch); + ssize = abd_chunk_size(&sch); + soff = 0; + } else + soff += isize; + + /* + * Same for the dest chunk. + */ + dsize -= isize; + if (dsize == 0) { + abd_chunk_unmap(&dch); + abd_chunk_advance(&dch); + ASSERT(!abd_chunk_done(&dch)); + daddr = abd_chunk_map(&dch); + dsize = abd_chunk_size(&dch); + doff = 0; + } else + doff += isize; } + abd_chunk_unmap(&sch); + abd_chunk_unmap(&dch); + return (ret); } @@ -1021,26 +1007,28 @@ abd_cmp(abd_t *dabd, abd_t *sabd) /* * Check if ABD content is all-zeroes. */ -static int -abd_cmp_zero_off_cb(void *data, size_t len, void *private) +int +abd_cmp_zero_off(abd_t *abd, size_t off, size_t size) { - (void) private; + int ret = 0; - /* This function can only check whole uint64s. Enforce that. */ - ASSERT0(P2PHASE(len, 8)); + abd_for_each_chunk(abd, off, size, data, dsize, { + /* This function can only check whole uint64s. Enforce that. */ + ASSERT0(P2PHASE(dsize, 8)); - uint64_t *end = (uint64_t *)((char *)data + len); - for (uint64_t *word = (uint64_t *)data; word < end; word++) - if (*word != 0) - return (1); + uint64_t *end = (uint64_t *)((char *)data + dsize); + for (uint64_t *word = data; word < end; word++) { + if (*word != 0) { + ret = 1; + break; + } + } - return (0); -} + if (ret != 0) + break; + }); -int -abd_cmp_zero_off(abd_t *abd, size_t off, size_t size) -{ - return (abd_iterate_func(abd, off, size, abd_cmp_zero_off_cb, NULL)); + return (ret); } /*