On 2018/12/3 下午11:20, Josef Bacik wrote: > Now with the delayed_refs_rsv we can now know exactly how much pending > delayed refs space we need. This means we can drastically simplify > btrfs_check_space_for_delayed_refs by simply checking how much space we > have reserved for the global rsv (which acts as a spill over buffer) and > the delayed refs rsv. If our total size is beyond that amount then we > know it's time to commit the transaction and stop any more delayed refs > from being generated. This patch is causing obvious large performance regression for metadata relocation. Bisect leads to this patch. I'm using a script copying /usr (around 3.5G) into a subvolume, and do 16 snapshots, then touching 3 random files in each snapshot. Then do a *metadata* balance. Please note, quota is *DISABLED* here. The full scripts can be found here: https://gist.github.com/adam900710/e0a9719441e770a4d0d7b32c4a88bf95 Before this patch, it's around 5s to relocate 3 metadata block groups. (VM is using unsafe cache mode, so IO is as fast as memory speed). After this patch, I don't know how long it will take, as it doesn't even finish before I reset the VM. I also found during the super slow relocation, the generation of the fs is increasing like crazy. So something is definitely causing a ton of transaction commit. Thanks, Qu > > Signed-off-by: Josef Bacik > --- > fs/btrfs/ctree.h | 2 +- > fs/btrfs/extent-tree.c | 48 ++++++++++++++++++------------------------------ > fs/btrfs/inode.c | 4 ++-- > fs/btrfs/transaction.c | 2 +- > 4 files changed, 22 insertions(+), 34 deletions(-) > > diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h > index 2eba398c722b..30da075c042e 100644 > --- a/fs/btrfs/ctree.h > +++ b/fs/btrfs/ctree.h > @@ -2631,7 +2631,7 @@ static inline u64 btrfs_calc_trunc_metadata_size(struct btrfs_fs_info *fs_info, > } > > int btrfs_should_throttle_delayed_refs(struct btrfs_trans_handle *trans); > -int btrfs_check_space_for_delayed_refs(struct btrfs_trans_handle *trans); > +bool btrfs_check_space_for_delayed_refs(struct btrfs_fs_info *fs_info); > void btrfs_dec_block_group_reservations(struct btrfs_fs_info *fs_info, > const u64 start); > void btrfs_wait_block_group_reservations(struct btrfs_block_group_cache *bg); > diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c > index 5a2d0b061f57..07ef1b8087f7 100644 > --- a/fs/btrfs/extent-tree.c > +++ b/fs/btrfs/extent-tree.c > @@ -2839,40 +2839,28 @@ u64 btrfs_csum_bytes_to_leaves(struct btrfs_fs_info *fs_info, u64 csum_bytes) > return num_csums; > } > > -int btrfs_check_space_for_delayed_refs(struct btrfs_trans_handle *trans) > +bool btrfs_check_space_for_delayed_refs(struct btrfs_fs_info *fs_info) > { > - struct btrfs_fs_info *fs_info = trans->fs_info; > - struct btrfs_block_rsv *global_rsv; > - u64 num_heads = trans->transaction->delayed_refs.num_heads_ready; > - u64 csum_bytes = trans->transaction->delayed_refs.pending_csums; > - unsigned int num_dirty_bgs = trans->transaction->num_dirty_bgs; > - u64 num_bytes, num_dirty_bgs_bytes; > - int ret = 0; > + struct btrfs_block_rsv *delayed_refs_rsv = &fs_info->delayed_refs_rsv; > + struct btrfs_block_rsv *global_rsv = &fs_info->global_block_rsv; > + bool ret = false; > + u64 reserved; > > - num_bytes = btrfs_calc_trans_metadata_size(fs_info, 1); > - num_heads = heads_to_leaves(fs_info, num_heads); > - if (num_heads > 1) > - num_bytes += (num_heads - 1) * fs_info->nodesize; > - num_bytes <<= 1; > - num_bytes += btrfs_csum_bytes_to_leaves(fs_info, csum_bytes) * > - fs_info->nodesize; > - num_dirty_bgs_bytes = btrfs_calc_trans_metadata_size(fs_info, > - num_dirty_bgs); > - global_rsv = &fs_info->global_block_rsv; > + spin_lock(&global_rsv->lock); > + reserved = global_rsv->reserved; > + spin_unlock(&global_rsv->lock); > > /* > - * If we can't allocate any more chunks lets make sure we have _lots_ of > - * wiggle room since running delayed refs can create more delayed refs. > + * Since the global reserve is just kind of magic we don't really want > + * to rely on it to save our bacon, so if our size is more than the > + * delayed_refs_rsv and the global rsv then it's time to think about > + * bailing. > */ > - if (global_rsv->space_info->full) { > - num_dirty_bgs_bytes <<= 1; > - num_bytes <<= 1; > - } > - > - spin_lock(&global_rsv->lock); > - if (global_rsv->reserved <= num_bytes + num_dirty_bgs_bytes) > - ret = 1; > - spin_unlock(&global_rsv->lock); > + spin_lock(&delayed_refs_rsv->lock); > + reserved += delayed_refs_rsv->reserved; > + if (delayed_refs_rsv->size >= reserved) > + ret = true; > + spin_unlock(&delayed_refs_rsv->lock); > return ret; > } > > @@ -2891,7 +2879,7 @@ int btrfs_should_throttle_delayed_refs(struct btrfs_trans_handle *trans) > if (val >= NSEC_PER_SEC / 2) > return 2; > > - return btrfs_check_space_for_delayed_refs(trans); > + return btrfs_check_space_for_delayed_refs(trans->fs_info); > } > > struct async_delayed_refs { > diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c > index a097f5fde31d..8532a2eb56d1 100644 > --- a/fs/btrfs/inode.c > +++ b/fs/btrfs/inode.c > @@ -5326,8 +5326,8 @@ static struct btrfs_trans_handle *evict_refill_and_join(struct btrfs_root *root, > * Try to steal from the global reserve if there is space for > * it. > */ > - if (!btrfs_check_space_for_delayed_refs(trans) && > - !btrfs_block_rsv_migrate(global_rsv, rsv, rsv->size, false)) > + if (!btrfs_check_space_for_delayed_refs(fs_info) && > + !btrfs_block_rsv_migrate(global_rsv, rsv, rsv->size, 0)) > return trans; > > /* If not, commit and try again. */ > diff --git a/fs/btrfs/transaction.c b/fs/btrfs/transaction.c > index a21c4defad92..2d8401bf8df9 100644 > --- a/fs/btrfs/transaction.c > +++ b/fs/btrfs/transaction.c > @@ -789,7 +789,7 @@ static int should_end_transaction(struct btrfs_trans_handle *trans) > { > struct btrfs_fs_info *fs_info = trans->fs_info; > > - if (btrfs_check_space_for_delayed_refs(trans)) > + if (btrfs_check_space_for_delayed_refs(fs_info)) > return 1; > > return !!btrfs_block_rsv_check(&fs_info->global_block_rsv, 5); >