From 44c33291c78d10012294ec197af3aebb27c6788f Mon Sep 17 00:00:00 2001 From: Waldemar Kozaczuk Date: Mon, 8 Jul 2024 12:00:17 -0400 Subject: [PATCH] make it thread-safe This patch modifies this fork of lwext4 to make is safe to interact with by multiple threads running in OSv. The key assumption is, that OSv VFS layer provides necessary locking around all interactions with lwext4 to guard ext filesystem metadata (i-node table, directory entries, etc) modifications confined to specific vnode. Beyond that, we add necessary locking around 3 key common data structures: - i-node bitmaps in ext4_ialloc.c - data block bitmaps in ext4_balloc.c - metadata blockcache in ext4_bcache.c and related files More specifically following functions are protected with inode_alloc_lock()/unlock() to make sure no two files/directories get assigned same inode number: - ext4_ialloc_alloc_inode() - ext4_ialloc_free_inode() Next, following functions are protected with block_alloc_lock()/unlock() to make sure no two files/directories use same data block: - ext4_balloc_alloc_block() - ext4_balloc_free_block() - ext4_balloc_free_blocks() Finally, these functions in ext4_bcache.c and related source files are protected with bcache_lock()/unlock() to make sure the global metadata block cache access is synchronized: - ext4_bcache_invalidate_lba() in __ext4_balloc_free_block() and __ext4_balloc_free_blocks() - ext4_bcache_find_get(), ext4_block_flush_buf() and ext4_bcache_free() in ext4_block_flush_lba() - ext4_block_get_noread(), ext4_bcache_test_flag() ext4_bcache_free() in ext4_block_get() - ext4_bcache_free() in ext4_block_set() - ext4_block_get_noread() in ext4_trans_block_get_noread() Ref https://github.com/gkostka/lwext4/issues/83 Ref https://github.com/cloudius-systems/osv/issues/1179 Signed-off-by: Waldemar Kozaczuk --- include/ext4_bcache.h | 46 +++++++++++++++++++------------------------ include/ext4_fs.h | 9 +++++++++ src/ext4_balloc.c | 40 ++++++++++++++++++++++++++++++++++--- src/ext4_bcache.c | 25 +++++++++++++++++++++++ src/ext4_blockdev.c | 16 +++++++++++++-- src/ext4_fs.c | 9 ++++++--- src/ext4_ialloc.c | 23 ++++++++++++++++++++-- src/ext4_trans.c | 5 ++--- 8 files changed, 134 insertions(+), 39 deletions(-) diff --git a/include/ext4_bcache.h b/include/ext4_bcache.h index de12bd5d..5f91bf25 100644 --- a/include/ext4_bcache.h +++ b/include/ext4_bcache.h @@ -65,10 +65,26 @@ struct ext4_block { struct ext4_bcache; +/**@brief buffer state bits + * + * - BC_UPTODATE: Buffer contains valid data. + * - BC_DIRTY: Buffer is dirty. + * - BC_FLUSH: Buffer will be immediately flushed, + * when no one references it. + * - BC_TMP: Buffer will be dropped once its refctr + * reaches zero. + */ +enum bcache_state_bits { + BC_UPTODATE, // Mostly set in ext4_block_get() after reading from disk + BC_DIRTY, // Most importantly set in ext4_bcache_set_dirty() + BC_FLUSH, // Set in journal code + BC_TMP // Set in journal code +}; + /**@brief Single block descriptor*/ struct ext4_buf { /**@brief Flags*/ - int flags; + char flags[BC_TMP + 1]; /**@brief Logical block address*/ uint64_t lba; @@ -148,30 +164,14 @@ struct ext4_bcache { SLIST_HEAD(ext4_buf_dirty, ext4_buf) dirty_list; }; -/**@brief buffer state bits - * - * - BC♡UPTODATE: Buffer contains valid data. - * - BC_DIRTY: Buffer is dirty. - * - BC_FLUSH: Buffer will be immediately flushed, - * when no one references it. - * - BC_TMP: Buffer will be dropped once its refctr - * reaches zero. - */ -enum bcache_state_bits { - BC_UPTODATE, - BC_DIRTY, - BC_FLUSH, - BC_TMP -}; - #define ext4_bcache_set_flag(buf, b) \ - (buf)->flags |= 1 << (b) + (buf)->flags[b] = 1 #define ext4_bcache_clear_flag(buf, b) \ - (buf)->flags &= ~(1 << (b)) + (buf)->flags[b] = 0 #define ext4_bcache_test_flag(buf, b) \ - (((buf)->flags & (1 << (b))) >> (b)) + (((buf)->flags[b] == 1 )) static inline void ext4_bcache_set_dirty(struct ext4_buf *buf) { ext4_bcache_set_flag(buf, BC_UPTODATE); @@ -239,12 +239,6 @@ struct ext4_buf *ext4_buf_lowest_lru(struct ext4_bcache *bc); * @param buf buffer*/ void ext4_bcache_drop_buf(struct ext4_bcache *bc, struct ext4_buf *buf); -/**@brief Invalidate a buffer. - * @param bc block cache descriptor - * @param buf buffer*/ -void ext4_bcache_invalidate_buf(struct ext4_bcache *bc, - struct ext4_buf *buf); - /**@brief Invalidate a range of buffers. * @param bc block cache descriptor * @param from starting lba diff --git a/include/ext4_fs.h b/include/ext4_fs.h index b76be6ca..480bca9b 100644 --- a/include/ext4_fs.h +++ b/include/ext4_fs.h @@ -67,6 +67,15 @@ struct ext4_fs { struct jbd_fs *jbd_fs; struct jbd_journal *jbd_journal; struct jbd_trans *curr_trans; + + void (*inode_alloc_lock)(); + void (*inode_alloc_unlock)(); + + void (*block_alloc_lock)(); + void (*block_alloc_unlock)(); + + void (*bcache_lock)(); + void (*bcache_unlock)(); }; struct ext4_block_group_ref { diff --git a/src/ext4_balloc.c b/src/ext4_balloc.c index 7984e7c0..f0088937 100644 --- a/src/ext4_balloc.c +++ b/src/ext4_balloc.c @@ -149,7 +149,8 @@ ext4_balloc_verify_bitmap_csum(struct ext4_sblock *sb, #define ext4_balloc_verify_bitmap_csum(...) true #endif -int ext4_balloc_free_block(struct ext4_inode_ref *inode_ref, ext4_fsblk_t baddr) +static int +__ext4_balloc_free_block(struct ext4_inode_ref *inode_ref, ext4_fsblk_t baddr) { struct ext4_fs *fs = inode_ref->fs; struct ext4_sblock *sb = &fs->sb; @@ -223,14 +224,25 @@ int ext4_balloc_free_block(struct ext4_inode_ref *inode_ref, ext4_fsblk_t baddr) ext4_fs_put_block_group_ref(&bg_ref); return rc; } + fs->bcache_lock(); ext4_bcache_invalidate_lba(fs->bdev->bc, baddr, 1); + fs->bcache_unlock(); /* Release block group reference */ rc = ext4_fs_put_block_group_ref(&bg_ref); return rc; } -int ext4_balloc_free_blocks(struct ext4_inode_ref *inode_ref, +int ext4_balloc_free_block(struct ext4_inode_ref *inode_ref, ext4_fsblk_t baddr) +{ + inode_ref->fs->block_alloc_lock(); + int rc = __ext4_balloc_free_block(inode_ref, baddr); + inode_ref->fs->block_alloc_unlock(); + return rc; +} + +static int +__ext4_balloc_free_blocks(struct ext4_inode_ref *inode_ref, ext4_fsblk_t first, uint32_t count) { int rc = EOK; @@ -342,14 +354,26 @@ int ext4_balloc_free_blocks(struct ext4_inode_ref *inode_ref, } + fs->bcache_lock(); ext4_bcache_invalidate_lba(fs->bdev->bc, start_block, blk_cnt); + fs->bcache_unlock(); /*All blocks should be released*/ ext4_assert(count == 0); return rc; } -int ext4_balloc_alloc_block(struct ext4_inode_ref *inode_ref, +int ext4_balloc_free_blocks(struct ext4_inode_ref *inode_ref, + ext4_fsblk_t first, uint32_t count) +{ + inode_ref->fs->block_alloc_lock(); + int rc = __ext4_balloc_free_blocks(inode_ref, first, count); + inode_ref->fs->block_alloc_unlock(); + return rc; +} + +static int +__ext4_balloc_alloc_block(struct ext4_inode_ref *inode_ref, ext4_fsblk_t goal, ext4_fsblk_t *fblock) { @@ -582,6 +606,16 @@ int ext4_balloc_alloc_block(struct ext4_inode_ref *inode_ref, return r; } +int ext4_balloc_alloc_block(struct ext4_inode_ref *inode_ref, + ext4_fsblk_t goal, + ext4_fsblk_t *fblock) +{ + inode_ref->fs->block_alloc_lock(); + int rc = __ext4_balloc_alloc_block(inode_ref, goal, fblock); + inode_ref->fs->block_alloc_unlock(); + return rc; +} + int ext4_balloc_try_alloc_block(struct ext4_inode_ref *inode_ref, ext4_fsblk_t baddr, bool *free) { diff --git a/src/ext4_bcache.c b/src/ext4_bcache.c index 9d3c7fbd..e8905320 100644 --- a/src/ext4_bcache.c +++ b/src/ext4_bcache.c @@ -153,11 +153,19 @@ ext4_buf_lookup(struct ext4_bcache *bc, uint64_t lba) return RB_FIND(ext4_buf_lba, &bc->lba_root, &tmp); } +// No need for locking +// Called by ext4_block_cache_shake() ONLY +// PROTECTED in ext4_block_get_noread() because the only +// caller - ext4_block_cache_shake() - is protected struct ext4_buf *ext4_buf_lowest_lru(struct ext4_bcache *bc) { return RB_MIN(ext4_buf_lru, &bc->lru_root); } +// No need for locking +// Called by ext4_block_cache_shake() and functions in this file +// PROTECTED in ext4_block_get_noread() because the +// caller - ext4_block_cache_shake() - is protected void ext4_bcache_drop_buf(struct ext4_bcache *bc, struct ext4_buf *buf) { /* Warn on dropping any referenced buffers.*/ @@ -178,6 +186,10 @@ void ext4_bcache_drop_buf(struct ext4_bcache *bc, struct ext4_buf *buf) bc->ref_blocks--; } +/**@brief Invalidate a buffer. + * @param bc block cache descriptor + * @param buf buffer*/ +static void ext4_bcache_invalidate_buf(struct ext4_bcache *bc, struct ext4_buf *buf) { @@ -191,6 +203,9 @@ void ext4_bcache_invalidate_buf(struct ext4_bcache *bc, ext4_bcache_clear_dirty(buf); } +// Called by ext4_balloc_free_block/s() +// Protect by LOCK +// PROTECTED in the callers void ext4_bcache_invalidate_lba(struct ext4_bcache *bc, uint64_t from, uint32_t cnt) @@ -205,6 +220,10 @@ void ext4_bcache_invalidate_lba(struct ext4_bcache *bc, } } +// Called by: +// - ext4_bcache_alloc() - No need to lock (see below) +// - ext4_block_flush_lba() - probably need to lock there - PROTECTED +// - 2 methods in ext4_journal() not used right now - ignore struct ext4_buf * ext4_bcache_find_get(struct ext4_bcache *bc, struct ext4_block *b, uint64_t lba) @@ -231,6 +250,10 @@ ext4_bcache_find_get(struct ext4_bcache *bc, struct ext4_block *b, return buf; } +// Called by ext4_block_get_noread() ONLY which also calls ext4_block_cache_shake() +// which calls numbers of function here. So no need for locking, +// and maybe we should lock in ext4_block_get_noread()? +// PROTECTED in ext4_block_get_noread() int ext4_bcache_alloc(struct ext4_bcache *bc, struct ext4_block *b, bool *is_new) { @@ -267,6 +290,8 @@ int ext4_bcache_alloc(struct ext4_bcache *bc, struct ext4_block *b, return EOK; } +// Called by 3 functions - ext4_block_flush_lba(), ext4_block_get() and ext4_block_set() +// Protected there int ext4_bcache_free(struct ext4_bcache *bc, struct ext4_block *b) { struct ext4_buf *buf = b->buf; diff --git a/src/ext4_blockdev.c b/src/ext4_blockdev.c index c01093ad..916d894a 100644 --- a/src/ext4_blockdev.c +++ b/src/ext4_blockdev.c @@ -170,16 +170,19 @@ int ext4_block_flush_buf(struct ext4_blockdev *bdev, struct ext4_buf *buf) return EOK; } +//Used indirectly by journal-ing code int ext4_block_flush_lba(struct ext4_blockdev *bdev, uint64_t lba) { int r = EOK; struct ext4_buf *buf; struct ext4_block b; + bdev->fs->bcache_lock(); buf = ext4_bcache_find_get(bdev->bc, &b, lba); if (buf) { r = ext4_block_flush_buf(bdev, buf); ext4_bcache_free(bdev->bc, &b); } + bdev->fs->bcache_unlock(); return r; } @@ -244,19 +247,24 @@ int ext4_block_get_noread(struct ext4_blockdev *bdev, struct ext4_block *b, int ext4_block_get(struct ext4_blockdev *bdev, struct ext4_block *b, uint64_t lba) { + bdev->fs->bcache_lock(); int r = ext4_block_get_noread(bdev, b, lba); - if (r != EOK) + if (r != EOK) { + bdev->fs->bcache_unlock(); return r; + } if (ext4_bcache_test_flag(b->buf, BC_UPTODATE)) { /* Data in the cache is up-to-date. * Reading from physical device is not required */ + bdev->fs->bcache_unlock(); return EOK; } r = ext4_blocks_get_direct(bdev, b->data, lba, 1); if (r != EOK) { ext4_bcache_free(bdev->bc, b); + bdev->fs->bcache_unlock(); b->lb_id = 0; return r; } @@ -264,6 +272,7 @@ int ext4_block_get(struct ext4_blockdev *bdev, struct ext4_block *b, /* Mark buffer up-to-date, since * fresh data is read from physical device just now. */ ext4_bcache_set_flag(b->buf, BC_UPTODATE); + bdev->fs->bcache_unlock(); return EOK; } @@ -275,7 +284,10 @@ int ext4_block_set(struct ext4_blockdev *bdev, struct ext4_block *b) if (!bdev->bdif->ph_refctr) return EIO; - return ext4_bcache_free(bdev->bc, b); + bdev->fs->bcache_lock(); + int rc = ext4_bcache_free(bdev->bc, b); + bdev->fs->bcache_unlock(); + return rc; } int ext4_blocks_get_direct(struct ext4_blockdev *bdev, void *buf, uint64_t lba, diff --git a/src/ext4_fs.c b/src/ext4_fs.c index c7a99e7b..26d25a02 100644 --- a/src/ext4_fs.c +++ b/src/ext4_fs.c @@ -1657,8 +1657,10 @@ int ext4_fs_append_inode_dblk(struct ext4_inode_ref *inode_ref, if (rc != EOK) return rc; - *fblock = current_fsblk; - ext4_assert(*fblock); + if (fblock) { + *fblock = current_fsblk; + ext4_assert(*fblock); + } ext4_inode_set_size(inode_ref->inode, inode_size + block_size); inode_ref->dirty = true; @@ -1702,7 +1704,8 @@ int ext4_fs_append_inode_dblk(struct ext4_inode_ref *inode_ref, ext4_inode_set_size(inode_ref->inode, inode_size + block_size); inode_ref->dirty = true; - *fblock = phys_block; + if (fblock) + *fblock = phys_block; *iblock = new_block_idx; return EOK; diff --git a/src/ext4_ialloc.c b/src/ext4_ialloc.c index f2c796fd..196fdd27 100644 --- a/src/ext4_ialloc.c +++ b/src/ext4_ialloc.c @@ -110,6 +110,7 @@ static uint32_t ext4_ialloc_bitmap_csum(struct ext4_sblock *sb, void *bitmap) #define ext4_ialloc_bitmap_csum(...) 0 #endif +//TODO: Check this one when called by ext4_fs_get_block_group_ref() void ext4_ialloc_set_bitmap_csum(struct ext4_sblock *sb, struct ext4_bgroup *bg, void *bitmap __unused) { @@ -155,7 +156,8 @@ ext4_ialloc_verify_bitmap_csum(struct ext4_sblock *sb, struct ext4_bgroup *bg, #define ext4_ialloc_verify_bitmap_csum(...) true #endif -int ext4_ialloc_free_inode(struct ext4_fs *fs, uint32_t index, bool is_dir) +static int +__ext4_ialloc_free_inode(struct ext4_fs *fs, uint32_t index, bool is_dir) { struct ext4_sblock *sb = &fs->sb; @@ -226,7 +228,16 @@ int ext4_ialloc_free_inode(struct ext4_fs *fs, uint32_t index, bool is_dir) return EOK; } -int ext4_ialloc_alloc_inode(struct ext4_fs *fs, uint32_t *idx, bool is_dir) +int ext4_ialloc_free_inode(struct ext4_fs *fs, uint32_t index, bool is_dir) +{ + fs->inode_alloc_lock(); + int rc = __ext4_ialloc_free_inode(fs, index, is_dir); + fs->inode_alloc_unlock(); + return rc; +} + +static int +__ext4_ialloc_alloc_inode(struct ext4_fs *fs, uint32_t *idx, bool is_dir) { struct ext4_sblock *sb = &fs->sb; @@ -365,6 +376,14 @@ int ext4_ialloc_alloc_inode(struct ext4_fs *fs, uint32_t *idx, bool is_dir) return ENOSPC; } +int ext4_ialloc_alloc_inode(struct ext4_fs *fs, uint32_t *idx, bool is_dir) +{ + fs->inode_alloc_lock(); + int rc = __ext4_ialloc_alloc_inode(fs, idx, is_dir); + fs->inode_alloc_unlock(); + return rc; +} + /** * @} */ diff --git a/src/ext4_trans.c b/src/ext4_trans.c index f2287518..3e888b62 100644 --- a/src/ext4_trans.c +++ b/src/ext4_trans.c @@ -68,10 +68,9 @@ int ext4_trans_block_get_noread(struct ext4_blockdev *bdev, struct ext4_block *b, uint64_t lba) { + bdev->fs->bcache_lock(); int r = ext4_block_get_noread(bdev, b, lba); - if (r != EOK) - return r; - + bdev->fs->bcache_unlock(); return r; }