On 2019/11/25 下午10:40, Josef Bacik wrote: > inc_block_group_ro does a calculation to see if we have enough room left > over if we mark this block group as read only in order to see if it's ok > to mark the block group as read only. > > The problem is this calculation _only_ works for data, where our used is > always less than our total. For metadata we will overcommit, so this > will almost always fail for metadata. > > Fix this by exporting btrfs_can_overcommit, and then see if we have > enough space to remove the remaining free space in the block group we > are trying to mark read only. If we do then we can mark this block > group as read only. This patch is indeed much better than my naive RFC patches. Instead of reducing over commit threshold, this just increase the check to can_overcommit(), brilliant. However some small nitpicks below. > > Signed-off-by: Josef Bacik > --- > fs/btrfs/block-group.c | 37 ++++++++++++++++++++++++++----------- > fs/btrfs/space-info.c | 19 ++++++++++--------- > fs/btrfs/space-info.h | 3 +++ > 3 files changed, 39 insertions(+), 20 deletions(-) > > diff --git a/fs/btrfs/block-group.c b/fs/btrfs/block-group.c > index 3ffbc2e0af21..7b1f6d2b9621 100644 > --- a/fs/btrfs/block-group.c > +++ b/fs/btrfs/block-group.c > @@ -1184,7 +1184,6 @@ static int inc_block_group_ro(struct btrfs_block_group *cache, int force) Now @force parameter is not used any more. We can have a cleanup patch for it. > { > struct btrfs_space_info *sinfo = cache->space_info; > u64 num_bytes; > - u64 sinfo_used; > int ret = -ENOSPC; > > spin_lock(&sinfo->lock); > @@ -1200,19 +1199,38 @@ static int inc_block_group_ro(struct btrfs_block_group *cache, int force) > > num_bytes = cache->length - cache->reserved - cache->pinned - > cache->bytes_super - cache->used; > - sinfo_used = btrfs_space_info_used(sinfo, true); > > /* > - * sinfo_used + num_bytes should always <= sinfo->total_bytes. > - * > - * Here we make sure if we mark this bg RO, we still have enough > - * free space as buffer. > + * Data never overcommits, even in mixed mode, so do just the straight > + * check of left over space in how much we have allocated. > */ > - if (sinfo_used + num_bytes + sinfo->total_bytes) { > + if (sinfo->flags & BTRFS_BLOCK_GROUP_DATA) { > + u64 sinfo_used = btrfs_space_info_used(sinfo, true); > + > + /* > + * sinfo_used + num_bytes should always <= sinfo->total_bytes. > + * > + * Here we make sure if we mark this bg RO, we still have enough > + * free space as buffer. > + */ > + if (sinfo_used + num_bytes + sinfo->total_bytes) The same code copied from patch 3 I guess? The same typo. > + ret = 0; > + } else { > + /* > + * We overcommit metadata, so we need to do the > + * btrfs_can_overcommit check here, and we need to pass in > + * BTRFS_RESERVE_NO_FLUSH to give ourselves the most amount of > + * leeway to allow us to mark this block group as read only. > + */ > + if (btrfs_can_overcommit(cache->fs_info, sinfo, num_bytes, > + BTRFS_RESERVE_NO_FLUSH)) > + ret = 0; > + } For metadata chunks, allocating a new chunk won't change how we do over commit calculation. I guess we can skip the chunk allocation for metadata chunks in btrfs_inc_block_group_ro()? Thanks, Qu > + > + if (!ret) { > sinfo->bytes_readonly += num_bytes; > cache->ro++; > list_add_tail(&cache->ro_list, &sinfo->ro_bgs); > - ret = 0; > } > out: > spin_unlock(&cache->lock); > @@ -1220,9 +1238,6 @@ static int inc_block_group_ro(struct btrfs_block_group *cache, int force) > if (ret == -ENOSPC && btrfs_test_opt(cache->fs_info, ENOSPC_DEBUG)) { > btrfs_info(cache->fs_info, > "unable to make block group %llu ro", cache->start); > - btrfs_info(cache->fs_info, > - "sinfo_used=%llu bg_num_bytes=%llu", > - sinfo_used, num_bytes); > btrfs_dump_space_info(cache->fs_info, cache->space_info, 0, 0); > } > return ret; > diff --git a/fs/btrfs/space-info.c b/fs/btrfs/space-info.c > index df5fb68df798..01297c5b2666 100644 > --- a/fs/btrfs/space-info.c > +++ b/fs/btrfs/space-info.c > @@ -159,9 +159,9 @@ static inline u64 calc_global_rsv_need_space(struct btrfs_block_rsv *global) > return (global->size << 1); > } > > -static int can_overcommit(struct btrfs_fs_info *fs_info, > - struct btrfs_space_info *space_info, u64 bytes, > - enum btrfs_reserve_flush_enum flush) > +int btrfs_can_overcommit(struct btrfs_fs_info *fs_info, > + struct btrfs_space_info *space_info, u64 bytes, > + enum btrfs_reserve_flush_enum flush) > { > u64 profile; > u64 avail; > @@ -226,7 +226,8 @@ void btrfs_try_granting_tickets(struct btrfs_fs_info *fs_info, > > /* Check and see if our ticket can be satisified now. */ > if ((used + ticket->bytes <= space_info->total_bytes) || > - can_overcommit(fs_info, space_info, ticket->bytes, flush)) { > + btrfs_can_overcommit(fs_info, space_info, ticket->bytes, > + flush)) { > btrfs_space_info_update_bytes_may_use(fs_info, > space_info, > ticket->bytes); > @@ -639,14 +640,14 @@ btrfs_calc_reclaim_metadata_size(struct btrfs_fs_info *fs_info, > return to_reclaim; > > to_reclaim = min_t(u64, num_online_cpus() * SZ_1M, SZ_16M); > - if (can_overcommit(fs_info, space_info, to_reclaim, > - BTRFS_RESERVE_FLUSH_ALL)) > + if (btrfs_can_overcommit(fs_info, space_info, to_reclaim, > + BTRFS_RESERVE_FLUSH_ALL)) > return 0; > > used = btrfs_space_info_used(space_info, true); > > - if (can_overcommit(fs_info, space_info, SZ_1M, > - BTRFS_RESERVE_FLUSH_ALL)) > + if (btrfs_can_overcommit(fs_info, space_info, SZ_1M, > + BTRFS_RESERVE_FLUSH_ALL)) > expected = div_factor_fine(space_info->total_bytes, 95); > else > expected = div_factor_fine(space_info->total_bytes, 90); > @@ -1005,7 +1006,7 @@ static int __reserve_metadata_bytes(struct btrfs_fs_info *fs_info, > */ > if (!pending_tickets && > ((used + orig_bytes <= space_info->total_bytes) || > - can_overcommit(fs_info, space_info, orig_bytes, flush))) { > + btrfs_can_overcommit(fs_info, space_info, orig_bytes, flush))) { > btrfs_space_info_update_bytes_may_use(fs_info, space_info, > orig_bytes); > ret = 0; > diff --git a/fs/btrfs/space-info.h b/fs/btrfs/space-info.h > index 1a349e3f9cc1..24514cd2c6c1 100644 > --- a/fs/btrfs/space-info.h > +++ b/fs/btrfs/space-info.h > @@ -127,6 +127,9 @@ int btrfs_reserve_metadata_bytes(struct btrfs_root *root, > enum btrfs_reserve_flush_enum flush); > void btrfs_try_granting_tickets(struct btrfs_fs_info *fs_info, > struct btrfs_space_info *space_info); > +int btrfs_can_overcommit(struct btrfs_fs_info *fs_info, > + struct btrfs_space_info *space_info, u64 bytes, > + enum btrfs_reserve_flush_enum flush); > > static inline void btrfs_space_info_free_bytes_may_use( > struct btrfs_fs_info *fs_info, >