Skip to content

Commit

Permalink
WIP: handle zfs_mknode issues with RENAME_WHITEOUT
Browse files Browse the repository at this point in the history
Signed-off-by: Aleksa Sarai <[email protected]>
  • Loading branch information
cyphar authored and Ryan Moeller committed Oct 19, 2022
1 parent 521bf71 commit 0abd108
Show file tree
Hide file tree
Showing 11 changed files with 187 additions and 37 deletions.
2 changes: 1 addition & 1 deletion include/os/freebsd/zfs/sys/zfs_vnops_os.h
Original file line number Diff line number Diff line change
Expand Up @@ -40,7 +40,7 @@ extern int zfs_rmdir(znode_t *dzp, const char *name, znode_t *cwd,
cred_t *cr, int flags);
extern int zfs_setattr(znode_t *zp, vattr_t *vap, int flag, cred_t *cr);
extern int zfs_rename(znode_t *sdzp, const char *snm, znode_t *tdzp,
const char *tnm, cred_t *cr, int flags, uint64_t rflags);
const char *tnm, cred_t *cr, int flags, uint64_t rflags, vattr_t *wo_vap);
extern int zfs_symlink(znode_t *dzp, const char *name, vattr_t *vap,
const char *link, znode_t **zpp, cred_t *cr, int flags);
extern int zfs_link(znode_t *tdzp, znode_t *sp,
Expand Down
10 changes: 10 additions & 0 deletions include/os/linux/spl/sys/sysmacros.h
Original file line number Diff line number Diff line change
Expand Up @@ -121,6 +121,16 @@ extern uint32_t zone_get_hostid(void *zone);
extern void spl_setup(void);
extern void spl_cleanup(void);

/*
* Only handles the first 4096 majors and first 256 minors. We don't have a
* libc for the kernel module so we define this inline.
*/
static inline dev_t
makedev(unsigned int major, unsigned int minor)
{
return ((major & 0xFFF) << 8) | (minor & 0xFF);
}

#define highbit(x) __fls(x)
#define lowbit(x) __ffs(x)

Expand Down
2 changes: 1 addition & 1 deletion include/os/linux/zfs/sys/zfs_vnops_os.h
Original file line number Diff line number Diff line change
Expand Up @@ -58,7 +58,7 @@ extern int zfs_getattr_fast(struct user_namespace *, struct inode *ip,
struct kstat *sp);
extern int zfs_setattr(znode_t *zp, vattr_t *vap, int flag, cred_t *cr);
extern int zfs_rename(znode_t *sdzp, char *snm, znode_t *tdzp,
char *tnm, cred_t *cr, int flags, uint64_t rflags);
char *tnm, cred_t *cr, int flags, uint64_t rflags, vattr_t *wo_vap);
extern int zfs_symlink(znode_t *dzp, char *name, vattr_t *vap,
char *link, znode_t **zpp, cred_t *cr, int flags);
extern int zfs_readlink(struct inode *ip, zfs_uio_t *uio, cred_t *cr);
Expand Down
6 changes: 3 additions & 3 deletions include/sys/zfs_znode.h
Original file line number Diff line number Diff line change
Expand Up @@ -275,12 +275,12 @@ extern void zfs_log_symlink(zilog_t *zilog, dmu_tx_t *tx, uint64_t txtype,
extern void zfs_log_rename(zilog_t *zilog, dmu_tx_t *tx, uint64_t txtype,
znode_t *sdzp, const char *sname, znode_t *tdzp, const char *dname,
znode_t *szp);
extern void zfs_log_rename_whiteout(zilog_t *zilog, dmu_tx_t *tx,
uint64_t txtype, znode_t *sdzp, const char *sname, znode_t *tdzp,
const char *dname, znode_t *szp);
extern void zfs_log_rename_exchange(zilog_t *zilog, dmu_tx_t *tx,
uint64_t txtype, znode_t *sdzp, const char *sname, znode_t *tdzp,
const char *dname, znode_t *szp);
extern void zfs_log_rename_whiteout(zilog_t *zilog, dmu_tx_t *tx,
uint64_t txtype, znode_t *sdzp, const char *sname, znode_t *tdzp,
const char *dname, znode_t *szp, znode_t *wzp);
extern void zfs_log_write(zilog_t *zilog, dmu_tx_t *tx, int txtype,
znode_t *zp, offset_t off, ssize_t len, int ioflag,
zil_callback_t callback, void *callback_data);
Expand Down
13 changes: 13 additions & 0 deletions include/sys/zil.h
Original file line number Diff line number Diff line change
Expand Up @@ -316,6 +316,19 @@ typedef struct {
/* 2 strings: names of source and destination follow this */
} lr_rename_t;

typedef struct {
lr_rename_t lr_rename; /* common rename portion */
/* members related to the whiteout file (based on lr_create_t) */
uint64_t lr_wfoid; /* obj id of the new whiteout file */
uint64_t lr_wmode; /* mode of object */
uint64_t lr_wuid; /* uid of whiteout */
uint64_t lr_wgid; /* gid of whiteout */
uint64_t lr_wgen; /* generation (txg of creation) */
uint64_t lr_wcrtime[2]; /* creation time */
uint64_t lr_wrdev; /* always makedev(0, 0) */
/* 2 strings: names of source and destination follow this */
} lr_rename_whiteout_t;

typedef struct {
lr_t lr_common; /* common portion of log record */
uint64_t lr_foid; /* file object to write */
Expand Down
4 changes: 2 additions & 2 deletions module/os/freebsd/zfs/zfs_vnops_os.c
Original file line number Diff line number Diff line change
Expand Up @@ -3473,15 +3473,15 @@ zfs_do_rename_impl(vnode_t *sdvp, vnode_t **svpp, struct componentname *scnp,

int
zfs_rename(znode_t *sdzp, const char *sname, znode_t *tdzp, const char *tname,
cred_t *cr, int flags, uint64_t rflags)
cred_t *cr, int flags, uint64_t rflags, vattr_t *wo_vap)
{
struct componentname scn, tcn;
vnode_t *sdvp, *tdvp;
vnode_t *svp, *tvp;
int error;
svp = tvp = NULL;

if (rflags != 0)
if (rflags != 0 || wo_vap != NULL)
return (SET_ERROR(EINVAL));

sdvp = ZTOV(sdzp);
Expand Down
23 changes: 14 additions & 9 deletions module/os/linux/zfs/zfs_vnops_os.c
Original file line number Diff line number Diff line change
Expand Up @@ -2769,6 +2769,7 @@ zfs_rename_lock(znode_t *szp, znode_t *tdzp, znode_t *sdzp, zfs_zlock_t **zlpp)
* cr - credentials of caller.
* flags - case flags
* rflags - RENAME_* flags
* wa_vap - attributes for RENAME_WHITEOUT (must be a char 0:0).
*
* RETURN: 0 on success, error code on failure.
*
Expand All @@ -2778,7 +2779,7 @@ zfs_rename_lock(znode_t *szp, znode_t *tdzp, znode_t *sdzp, zfs_zlock_t **zlpp)
/*ARGSUSED*/
int
zfs_rename(znode_t *sdzp, char *snm, znode_t *tdzp, char *tnm,
cred_t *cr, int flags, uint64_t rflags)
cred_t *cr, int flags, uint64_t rflags, vattr_t *wo_vap)
{
znode_t *szp, *tzp;
zfsvfs_t *zfsvfs = ZTOZSB(sdzp);
Expand All @@ -2792,7 +2793,6 @@ zfs_rename(znode_t *sdzp, char *snm, znode_t *tdzp, char *tnm,
int zflg = 0;
boolean_t waited = B_FALSE;
/* Needed for whiteout inode creation. */
vattr_t wo_vap;
boolean_t fuid_dirtied;
zfs_acl_ids_t acl_ids;
boolean_t have_acl = B_FALSE;
Expand All @@ -2810,6 +2810,15 @@ zfs_rename(znode_t *sdzp, char *snm, znode_t *tdzp, char *tnm,
(rflags & (RENAME_NOREPLACE | RENAME_WHITEOUT)))
return (SET_ERROR(EINVAL));

/*
* Make sure we only get wo_vap iff. RENAME_WHITEOUT and that it's the
* right kind of vattr_t for the whiteout file. These are set
* internally by ZFS so should never be incorrect.
*/
VERIFY_EQUIV(rflags & RENAME_WHITEOUT, wo_vap != NULL);
VERIFY_IMPLY(wo_vap, wo_vap->va_mode == S_IFCHR);
VERIFY_IMPLY(wo_vap, wo_vap->va_rdev == makedevice(0, 0));

ZFS_ENTER(zfsvfs);
ZFS_VERIFY_ZP(sdzp);
ZFS_VERIFY_ZP(tdzp);
Expand Down Expand Up @@ -3056,12 +3065,8 @@ zfs_rename(znode_t *sdzp, char *snm, znode_t *tdzp, char *tnm,
if (error)
goto out;

zpl_vap_init(&wo_vap, ZTOI(sdzp), S_IFCHR, cr);
/* Can't use of makedevice() here, so hard-code it. */
wo_vap.va_rdev = 0;

if (!have_acl) {
error = zfs_acl_ids_create(sdzp, 0, &wo_vap, cr, NULL,
error = zfs_acl_ids_create(sdzp, 0, wo_vap, cr, NULL,
&acl_ids);
if (error)
goto out;
Expand Down Expand Up @@ -3213,7 +3218,7 @@ zfs_rename(znode_t *sdzp, char *snm, znode_t *tdzp, char *tnm,
goto commit_unlink_td_szp;
break;
case RENAME_WHITEOUT:
zfs_mknode(sdzp, &wo_vap, tx, cr, 0, &wzp, &acl_ids);
zfs_mknode(sdzp, wo_vap, tx, cr, 0, &wzp, &acl_ids);
error = zfs_link_create(sdl, wzp, tx, ZNEW);
if (error) {
zfs_znode_delete(wzp, tx);
Expand All @@ -3235,7 +3240,7 @@ zfs_rename(znode_t *sdzp, char *snm, znode_t *tdzp, char *tnm,
case RENAME_WHITEOUT:
zfs_log_rename_whiteout(zilog, tx,
(flags & FIGNORECASE ? TX_CI : 0), sdzp, sdl->dl_name,
tdzp, tdl->dl_name, szp);
tdzp, tdl->dl_name, szp, wzp);
break;
default:
ASSERT0(rflags & ~RENAME_NOREPLACE);
Expand Down
12 changes: 11 additions & 1 deletion module/os/linux/zfs/zpl_inode.c
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,7 @@
*/


#include <sys/sysmacros.h>
#include <sys/zfs_ctldir.h>
#include <sys/zfs_vfsops.h>
#include <sys/zfs_vnops.h>
Expand Down Expand Up @@ -481,14 +482,23 @@ zpl_rename2(struct inode *sdip, struct dentry *sdentry,
#endif
{
cred_t *cr = CRED();
vattr_t *wo_vap = NULL;
int error;
fstrans_cookie_t cookie;

crhold(cr);
if (rflags & RENAME_WHITEOUT) {
wo_vap = kmem_zalloc(sizeof (vattr_t), KM_SLEEP);
zpl_vap_init(wo_vap, sdip, S_IFCHR, cr);
wo_vap->va_rdev = makedevice(0, 0);
}

cookie = spl_fstrans_mark();
error = -zfs_rename(ITOZ(sdip), dname(sdentry), ITOZ(tdip),
dname(tdentry), cr, 0, rflags);
dname(tdentry), cr, 0, rflags, wo_vap);
spl_fstrans_unmark(cookie);
if (wo_vap)
kmem_free(wo_vap, sizeof (vattr_t));
crfree(cr);
ASSERT3S(error, <=, 0);

Expand Down
47 changes: 45 additions & 2 deletions module/zfs/zfs_log.c
Original file line number Diff line number Diff line change
Expand Up @@ -550,20 +550,63 @@ zfs_log_rename_exchange(zilog_t *zilog, dmu_tx_t *tx, uint64_t txtype,

/*
* Handles TX_RENAME_WHITEOUT transactions.
*
* Unfortunately we cannot reuse do_zfs_log_rename because we we need to call
* zfs_mknode() on replay which requires stashing bits as with TX_CREATE.
*/
void
zfs_log_rename_whiteout(zilog_t *zilog, dmu_tx_t *tx, uint64_t txtype,
znode_t *sdzp, const char *sname, znode_t *tdzp, const char *dname,
znode_t *szp)
znode_t *szp, znode_t *wzp)
{
itx_t *itx;
spa_t *spa = zilog->zl_spa;
lr_rename_whiteout_t *lr;
size_t snamesize = strlen(sname) + 1;
size_t dnamesize = strlen(dname) + 1;

/* zfs_rename must have already activated this feature. */
VERIFY(spa_feature_is_enabled(spa, SPA_FEATURE_RENAME_WHITEOUT));
VERIFY(spa_feature_is_active(spa, SPA_FEATURE_RENAME_WHITEOUT));

if (zil_replaying(zilog, tx))
return;

txtype |= TX_RENAME_WHITEOUT;
do_zfs_log_rename(zilog, tx, txtype, sdzp, sname, tdzp, dname, szp);
itx = zil_itx_create(txtype, sizeof (*lr) + snamesize + dnamesize);
lr = (lr_rename_whiteout_t *)&itx->itx_lr;
lr->lr_rename.lr_sdoid = sdzp->z_id;
lr->lr_rename.lr_tdoid = tdzp->z_id;

/*
* RENAME_WHITEOUT will create an entry at the source znode, so we need
* to store the same data that the equivalent call to zfs_log_create()
* would.
*/
lr->lr_wfoid = wzp->z_id;
LR_FOID_SET_SLOTS(lr->lr_wfoid, wzp->z_dnodesize >> DNODE_SHIFT);
(void) sa_lookup(wzp->z_sa_hdl, SA_ZPL_GEN(ZTOZSB(wzp)), &lr->lr_wgen,
sizeof (uint64_t));
(void) sa_lookup(wzp->z_sa_hdl, SA_ZPL_CRTIME(ZTOZSB(wzp)),
lr->lr_wcrtime, sizeof (uint64_t) * 2);
lr->lr_wmode = wzp->z_mode;
lr->lr_wuid = (uint64_t)KUID_TO_SUID(ZTOUID(wzp));
lr->lr_wgid = (uint64_t)KGID_TO_SGID(ZTOGID(wzp));

/*
* This rdev will always be makdevice(0, 0) but because the ZIL log and
* replay code needs to be platform independent (and there is no
* platform independent makdev()) we need to copy the one created
* during the rename operation.
*/
(void) sa_lookup(wzp->z_sa_hdl, SA_ZPL_RDEV(ZTOZSB(wzp)), &lr->lr_wrdev,
sizeof (lr->lr_wrdev));

memcpy((char *)(lr + 1), sname, snamesize);
memcpy((char *)(lr + 1) + snamesize, dname, dnamesize);
itx->itx_oid = szp->z_id;

zil_itx_assign(zilog, itx, tx);
}

/*
Expand Down
91 changes: 76 additions & 15 deletions module/zfs/zfs_replay.c
Original file line number Diff line number Diff line change
Expand Up @@ -639,29 +639,25 @@ zfs_replay_link(void *arg1, void *arg2, boolean_t byteswap)
}

static int
do_zfs_replay_rename(void *arg1, void *arg2, boolean_t byteswap,
uint64_t rflags)
do_zfs_replay_rename(zfsvfs_t *zfsvfs, lr_rename_t *lr, char *sname,
char *tname, uint64_t rflags, vattr_t *wo_vap)
{
zfsvfs_t *zfsvfs = arg1;
lr_rename_t *lr = arg2;
char *sname = (char *)(lr + 1); /* sname and tname follow lr_rename_t */
char *tname = sname + strlen(sname) + 1;
znode_t *sdzp, *tdzp;
int error, vflg = 0;

if (byteswap)
byteswap_uint64_array(lr, sizeof (*lr));

/* Only Linux currently supports RENAME_* flags. */
#ifdef __linux__
VERIFY0(rflags & ~(RENAME_EXCHANGE | RENAME_WHITEOUT));

IMPLY(rflags & RENAME_EXCHANGE,
VERIFY_IMPLY(rflags & RENAME_EXCHANGE,
spa_feature_is_active(zfsvfs->z_os->os_spa,
SPA_FEATURE_RENAME_EXCHANGE));
IMPLY(rflags & RENAME_WHITEOUT,
VERIFY_IMPLY(rflags & RENAME_WHITEOUT,
spa_feature_is_active(zfsvfs->z_os->os_spa,
SPA_FEATURE_RENAME_WHITEOUT));

/* wo_vap must be non-NULL iff. we're doing RENAME_WHITEOUT */
VERIFY_EQUIV(rflags & RENAME_WHITEOUT, wo_vap != NULL);
#else
VERIFY0(rflags);
#endif
Expand All @@ -677,7 +673,8 @@ do_zfs_replay_rename(void *arg1, void *arg2, boolean_t byteswap,
if (lr->lr_common.lrc_txtype & TX_CI)
vflg |= FIGNORECASE;

error = zfs_rename(sdzp, sname, tdzp, tname, kcred, vflg, rflags);
error = zfs_rename(sdzp, sname, tdzp, tname, kcred, vflg, rflags,
wo_vap);

zrele(tdzp);
zrele(sdzp);
Expand All @@ -687,15 +684,37 @@ do_zfs_replay_rename(void *arg1, void *arg2, boolean_t byteswap,
static int
zfs_replay_rename(void *arg1, void *arg2, boolean_t byteswap)
{
return (do_zfs_replay_rename(arg1, arg2, byteswap, 0));
zfsvfs_t *zfsvfs = arg1;
lr_rename_t *lr = arg2;
char *sname = (char *)(lr + 1); /* sname and tname follow lr_rename_t */
char *tname = sname + strlen(sname) + 1;

if (byteswap)
byteswap_uint64_array(lr, sizeof (*lr));

return (do_zfs_replay_rename(zfsvfs, lr, sname, tname, 0, NULL));
}

static int
zfs_replay_rename_exchange(void *arg1, void *arg2, boolean_t byteswap)
{
#ifdef __linux__
return (do_zfs_replay_rename(arg1, arg2, byteswap, RENAME_EXCHANGE));
zfsvfs_t *zfsvfs = arg1;
lr_rename_t *lr = arg2;
char *sname = (char *)(lr + 1); /* sname and tname follow lr_rename_t */
char *tname = sname + strlen(sname) + 1;

if (byteswap)
byteswap_uint64_array(lr, sizeof (*lr));

return (do_zfs_replay_rename(zfsvfs, lr, sname, tname, RENAME_EXCHANGE,
NULL));
#else
/*
* We should never reach this point because the feature is not
* supported by non-Linux versions of OpenZFS.
*/
PANIC("TX_RENAME_EXCHANGE cannot be replayed on non-Linux systems.");
return (SET_ERROR(ENOTSUP));
#endif
}
Expand All @@ -704,8 +723,50 @@ static int
zfs_replay_rename_whiteout(void *arg1, void *arg2, boolean_t byteswap)
{
#ifdef __linux__
return (do_zfs_replay_rename(arg1, arg2, byteswap, RENAME_WHITEOUT));
zfsvfs_t *zfsvfs = arg1;
lr_rename_whiteout_t *lr = arg2;
int error;
/* sname and tname follow lr_rename_whiteout_t */
char *sname = (char *)(lr + 1);
char *tname = sname + strlen(sname) + 1;
/* For the whiteout file. */
xvattr_t xva;
uint64_t objid;
uint64_t dnodesize;

if (byteswap)
byteswap_uint64_array(lr, sizeof (*lr));

objid = LR_FOID_GET_OBJ(lr->lr_wfoid);
dnodesize = LR_FOID_GET_SLOTS(lr->lr_wfoid) << DNODE_SHIFT;

xva_init(&xva);
zfs_init_vattr(&xva.xva_vattr, ATTR_MODE | ATTR_UID | ATTR_GID,
lr->lr_wmode, lr->lr_wuid, lr->lr_wgid, lr->lr_wrdev, objid);

/*
* As with TX_CREATE, RENAME_WHITEOUT ends up in zfs_mknode(), which
* assigns the object's creation time, generation number, and dnode
* slot count. The generic zfs_rename() has no concept of these
* attributes, so we smuggle the values inside the vattr's otherwise
* unused va_ctime, va_nblocks, and va_fsid fields.
*/
ZFS_TIME_DECODE(&xva.xva_vattr.va_ctime, lr->lr_wcrtime);
xva.xva_vattr.va_nblocks = lr->lr_wgen;
xva.xva_vattr.va_fsid = dnodesize;

error = dnode_try_claim(zfsvfs->z_os, objid, dnodesize >> DNODE_SHIFT);
if (error)
return (error);

return (do_zfs_replay_rename(zfsvfs, &lr->lr_rename, sname, tname,
RENAME_WHITEOUT, &xva.xva_vattr));
#else
/*
* We should never reach this point because the feature is not
* supported by non-Linux versions of OpenZFS.
*/
PANIC("TX_RENAME_WHITEOUT cannot be replayed on non-Linux systems.");
return (SET_ERROR(ENOTSUP));
#endif
}
Expand Down
Loading

0 comments on commit 0abd108

Please sign in to comment.