Skip to content

Commit

Permalink
btrfs: zoned: finish least available block group on data bg allocation
Browse files Browse the repository at this point in the history
When we run out of active zones and no sufficient space is left in any
block groups, we need to finish one block group to make room to activate a
new block group.

However, we cannot do this for metadata block groups because we can cause a
deadlock by waiting for a running transaction commit. So, do that only for
a data block group.

Furthermore, the block group to be finished has two requirements. First,
the block group must not have reserved bytes left. Having reserved bytes
means we have an allocated region but did not yet send bios for it. If that
region is allocated by the thread calling btrfs_zone_finish(), it results
in a deadlock.

Second, the block group to be finished must not be a SYSTEM block
group. Finishing a SYSTEM block group easily breaks further chunk
allocation by nullifying the SYSTEM free space.

In a certain case, we cannot find any zone finish candidate or
btrfs_zone_finish() may fail. In that case, we fall back to split the
allocation bytes and fill the last spaces left in the block groups.

CC: [email protected] # 5.16+
Fixes: afba2bc ("btrfs: zoned: implement active zone tracking")
Reviewed-by: Johannes Thumshirn <[email protected]>
Signed-off-by: Naohiro Aota <[email protected]>
Signed-off-by: David Sterba <[email protected]>
  • Loading branch information
naota authored and kdave committed Jul 25, 2022
1 parent bb9950d commit 393f646
Show file tree
Hide file tree
Showing 3 changed files with 87 additions and 10 deletions.
50 changes: 40 additions & 10 deletions fs/btrfs/extent-tree.c
Original file line number Diff line number Diff line change
Expand Up @@ -3965,23 +3965,53 @@ static void found_extent(struct find_free_extent_ctl *ffe_ctl,
}
}

static int can_allocate_chunk_zoned(struct btrfs_fs_info *fs_info,
struct find_free_extent_ctl *ffe_ctl)
{
/* If we can activate new zone, just allocate a chunk and use it */
if (btrfs_can_activate_zone(fs_info->fs_devices, ffe_ctl->flags))
return 0;

/*
* We already reached the max active zones. Try to finish one block
* group to make a room for a new block group. This is only possible
* for a data block group because btrfs_zone_finish() may need to wait
* for a running transaction which can cause a deadlock for metadata
* allocation.
*/
if (ffe_ctl->flags & BTRFS_BLOCK_GROUP_DATA) {
int ret = btrfs_zone_finish_one_bg(fs_info);

if (ret == 1)
return 0;
else if (ret < 0)
return ret;
}

/*
* If we have enough free space left in an already active block group
* and we can't activate any other zone now, do not allow allocating a
* new chunk and let find_free_extent() retry with a smaller size.
*/
if (ffe_ctl->max_extent_size >= ffe_ctl->min_alloc_size)
return -ENOSPC;

/*
* We cannot activate a new block group and no enough space left in any
* block groups. So, allocating a new block group may not help. But,
* there is nothing to do anyway, so let's go with it.
*/
return 0;
}

static int can_allocate_chunk(struct btrfs_fs_info *fs_info,
struct find_free_extent_ctl *ffe_ctl)
{
switch (ffe_ctl->policy) {
case BTRFS_EXTENT_ALLOC_CLUSTERED:
return 0;
case BTRFS_EXTENT_ALLOC_ZONED:
/*
* If we have enough free space left in an already
* active block group and we can't activate any other
* zone now, do not allow allocating a new chunk and
* let find_free_extent() retry with a smaller size.
*/
if (ffe_ctl->max_extent_size >= ffe_ctl->min_alloc_size &&
!btrfs_can_activate_zone(fs_info->fs_devices, ffe_ctl->flags))
return -ENOSPC;
return 0;
return can_allocate_chunk_zoned(fs_info, ffe_ctl);
default:
BUG();
}
Expand Down
40 changes: 40 additions & 0 deletions fs/btrfs/zoned.c
Original file line number Diff line number Diff line change
Expand Up @@ -2180,3 +2180,43 @@ void btrfs_zoned_release_data_reloc_bg(struct btrfs_fs_info *fs_info, u64 logica
spin_unlock(&block_group->lock);
btrfs_put_block_group(block_group);
}

int btrfs_zone_finish_one_bg(struct btrfs_fs_info *fs_info)
{
struct btrfs_block_group *block_group;
struct btrfs_block_group *min_bg = NULL;
u64 min_avail = U64_MAX;
int ret;

spin_lock(&fs_info->zone_active_bgs_lock);
list_for_each_entry(block_group, &fs_info->zone_active_bgs,
active_bg_list) {
u64 avail;

spin_lock(&block_group->lock);
if (block_group->reserved ||
(block_group->flags & BTRFS_BLOCK_GROUP_SYSTEM)) {
spin_unlock(&block_group->lock);
continue;
}

avail = block_group->zone_capacity - block_group->alloc_offset;
if (min_avail > avail) {
if (min_bg)
btrfs_put_block_group(min_bg);
min_bg = block_group;
min_avail = avail;
btrfs_get_block_group(min_bg);
}
spin_unlock(&block_group->lock);
}
spin_unlock(&fs_info->zone_active_bgs_lock);

if (!min_bg)
return 0;

ret = btrfs_zone_finish(min_bg);
btrfs_put_block_group(min_bg);

return ret < 0 ? ret : 1;
}
7 changes: 7 additions & 0 deletions fs/btrfs/zoned.h
Original file line number Diff line number Diff line change
Expand Up @@ -80,6 +80,7 @@ void btrfs_free_zone_cache(struct btrfs_fs_info *fs_info);
bool btrfs_zoned_should_reclaim(struct btrfs_fs_info *fs_info);
void btrfs_zoned_release_data_reloc_bg(struct btrfs_fs_info *fs_info, u64 logical,
u64 length);
int btrfs_zone_finish_one_bg(struct btrfs_fs_info *fs_info);
#else /* CONFIG_BLK_DEV_ZONED */
static inline int btrfs_get_dev_zone(struct btrfs_device *device, u64 pos,
struct blk_zone *zone)
Expand Down Expand Up @@ -249,6 +250,12 @@ static inline bool btrfs_zoned_should_reclaim(struct btrfs_fs_info *fs_info)

static inline void btrfs_zoned_release_data_reloc_bg(struct btrfs_fs_info *fs_info,
u64 logical, u64 length) { }

static inline int btrfs_zone_finish_one_bg(struct btrfs_fs_info *fs_info)
{
return 1;
}

#endif

static inline bool btrfs_dev_is_sequential(struct btrfs_device *device, u64 pos)
Expand Down

0 comments on commit 393f646

Please sign in to comment.