On 2019/11/27 上午12:25, Josef Bacik wrote: > inc_block_group_ro does a calculation to see if we have enough room left > over if we mark this block group as read only in order to see if it's ok > to mark the block group as read only. > > The problem is this calculation _only_ works for data, where our used is > always less than our total. For metadata we will overcommit, so this > will almost always fail for metadata. > > Fix this by exporting btrfs_can_overcommit, and then see if we have > enough space to remove the remaining free space in the block group we > are trying to mark read only. If we do then we can mark this block > group as read only. > > Signed-off-by: Josef Bacik Reviewed-by: Qu Wenruo Just as mentioned in patch 3, it would be better to revert b12de52896c0 ("btrfs: scrub: Don't check free space before marking a block group RO") in this patch too. As your new over-commit check also prevent btrfs_inc_block_group_ro() to create empty system chunk, it solves that mentioned bug in a better way. Thanks, Qu > --- > fs/btrfs/block-group.c | 35 ++++++++++++++++++++++++----------- > fs/btrfs/space-info.c | 19 ++++++++++--------- > fs/btrfs/space-info.h | 3 +++ > 3 files changed, 37 insertions(+), 20 deletions(-) > > diff --git a/fs/btrfs/block-group.c b/fs/btrfs/block-group.c > index 5961411500ed..ca55eb6758d1 100644 > --- a/fs/btrfs/block-group.c > +++ b/fs/btrfs/block-group.c > @@ -1184,7 +1184,6 @@ static int inc_block_group_ro(struct btrfs_block_group *cache, int force) > { > struct btrfs_space_info *sinfo = cache->space_info; > u64 num_bytes; > - u64 sinfo_used; > int ret = -ENOSPC; > > spin_lock(&sinfo->lock); > @@ -1205,19 +1204,36 @@ static int inc_block_group_ro(struct btrfs_block_group *cache, int force) > > num_bytes = cache->length - cache->reserved - cache->pinned - > cache->bytes_super - cache->used; > - sinfo_used = btrfs_space_info_used(sinfo, true); > > /* > - * sinfo_used + num_bytes should always <= sinfo->total_bytes. > - * > - * Here we make sure if we mark this bg RO, we still have enough > - * free space as buffer. > + * Data never overcommits, even in mixed mode, so do just the straight > + * check of left over space in how much we have allocated. > */ > - if (sinfo_used + num_bytes <= sinfo->total_bytes) { > + if (sinfo->flags & BTRFS_BLOCK_GROUP_DATA) { > + u64 sinfo_used = btrfs_space_info_used(sinfo, true); > + > + /* > + * Here we make sure if we mark this bg RO, we still have enough > + * free space as buffer. > + */ > + if (sinfo_used + num_bytes <= sinfo->total_bytes) > + ret = 0; > + } else { > + /* > + * We overcommit metadata, so we need to do the > + * btrfs_can_overcommit check here, and we need to pass in > + * BTRFS_RESERVE_NO_FLUSH to give ourselves the most amount of > + * leeway to allow us to mark this block group as read only. > + */ > + if (btrfs_can_overcommit(cache->fs_info, sinfo, num_bytes, > + BTRFS_RESERVE_NO_FLUSH)) > + ret = 0; > + } > + > + if (!ret) { > sinfo->bytes_readonly += num_bytes; > cache->ro++; > list_add_tail(&cache->ro_list, &sinfo->ro_bgs); > - ret = 0; > } > out: > spin_unlock(&cache->lock); > @@ -1225,9 +1241,6 @@ static int inc_block_group_ro(struct btrfs_block_group *cache, int force) > if (ret == -ENOSPC && btrfs_test_opt(cache->fs_info, ENOSPC_DEBUG)) { > btrfs_info(cache->fs_info, > "unable to make block group %llu ro", cache->start); > - btrfs_info(cache->fs_info, > - "sinfo_used=%llu bg_num_bytes=%llu", > - sinfo_used, num_bytes); > btrfs_dump_space_info(cache->fs_info, cache->space_info, 0, 0); > } > return ret; > diff --git a/fs/btrfs/space-info.c b/fs/btrfs/space-info.c > index df5fb68df798..01297c5b2666 100644 > --- a/fs/btrfs/space-info.c > +++ b/fs/btrfs/space-info.c > @@ -159,9 +159,9 @@ static inline u64 calc_global_rsv_need_space(struct btrfs_block_rsv *global) > return (global->size << 1); > } > > -static int can_overcommit(struct btrfs_fs_info *fs_info, > - struct btrfs_space_info *space_info, u64 bytes, > - enum btrfs_reserve_flush_enum flush) > +int btrfs_can_overcommit(struct btrfs_fs_info *fs_info, > + struct btrfs_space_info *space_info, u64 bytes, > + enum btrfs_reserve_flush_enum flush) > { > u64 profile; > u64 avail; > @@ -226,7 +226,8 @@ void btrfs_try_granting_tickets(struct btrfs_fs_info *fs_info, > > /* Check and see if our ticket can be satisified now. */ > if ((used + ticket->bytes <= space_info->total_bytes) || > - can_overcommit(fs_info, space_info, ticket->bytes, flush)) { > + btrfs_can_overcommit(fs_info, space_info, ticket->bytes, > + flush)) { > btrfs_space_info_update_bytes_may_use(fs_info, > space_info, > ticket->bytes); > @@ -639,14 +640,14 @@ btrfs_calc_reclaim_metadata_size(struct btrfs_fs_info *fs_info, > return to_reclaim; > > to_reclaim = min_t(u64, num_online_cpus() * SZ_1M, SZ_16M); > - if (can_overcommit(fs_info, space_info, to_reclaim, > - BTRFS_RESERVE_FLUSH_ALL)) > + if (btrfs_can_overcommit(fs_info, space_info, to_reclaim, > + BTRFS_RESERVE_FLUSH_ALL)) > return 0; > > used = btrfs_space_info_used(space_info, true); > > - if (can_overcommit(fs_info, space_info, SZ_1M, > - BTRFS_RESERVE_FLUSH_ALL)) > + if (btrfs_can_overcommit(fs_info, space_info, SZ_1M, > + BTRFS_RESERVE_FLUSH_ALL)) > expected = div_factor_fine(space_info->total_bytes, 95); > else > expected = div_factor_fine(space_info->total_bytes, 90); > @@ -1005,7 +1006,7 @@ static int __reserve_metadata_bytes(struct btrfs_fs_info *fs_info, > */ > if (!pending_tickets && > ((used + orig_bytes <= space_info->total_bytes) || > - can_overcommit(fs_info, space_info, orig_bytes, flush))) { > + btrfs_can_overcommit(fs_info, space_info, orig_bytes, flush))) { > btrfs_space_info_update_bytes_may_use(fs_info, space_info, > orig_bytes); > ret = 0; > diff --git a/fs/btrfs/space-info.h b/fs/btrfs/space-info.h > index 1a349e3f9cc1..24514cd2c6c1 100644 > --- a/fs/btrfs/space-info.h > +++ b/fs/btrfs/space-info.h > @@ -127,6 +127,9 @@ int btrfs_reserve_metadata_bytes(struct btrfs_root *root, > enum btrfs_reserve_flush_enum flush); > void btrfs_try_granting_tickets(struct btrfs_fs_info *fs_info, > struct btrfs_space_info *space_info); > +int btrfs_can_overcommit(struct btrfs_fs_info *fs_info, > + struct btrfs_space_info *space_info, u64 bytes, > + enum btrfs_reserve_flush_enum flush); > > static inline void btrfs_space_info_free_bytes_may_use( > struct btrfs_fs_info *fs_info, >