Linux-BTRFS Archive on lore.kernel.org
 help / color / Atom feed
From: Nikolay Borisov <nborisov@suse.com>
To: Josef Bacik <josef@toxicpanda.com>,
	linux-btrfs@vger.kernel.org, kernel-team@fb.com
Subject: Re: [PATCH 5/8] btrfs: don't enospc all tickets on flush failure
Date: Mon, 26 Nov 2018 14:25:52 +0200
Message-ID: <ccd81359-764f-6651-a738-ba3b3843b9b3@suse.com> (raw)
In-Reply-To: <20181121190313.24575-6-josef@toxicpanda.com>



On 21.11.18 г. 21:03 ч., Josef Bacik wrote:
> With the introduction of the per-inode block_rsv it became possible to
> have really really large reservation requests made because of data
> fragmentation.  Since the ticket stuff assumed that we'd always have
> relatively small reservation requests it just killed all tickets if we
> were unable to satisfy the current request.  However this is generally
> not the case anymore.  So fix this logic to instead see if we had a
> ticket that we were able to give some reservation to, and if we were
> continue the flushing loop again.  Likewise we make the tickets use the
> space_info_add_old_bytes() method of returning what reservation they did
> receive in hopes that it could satisfy reservations down the line.


The logic of the patch can be summarised as follows:

If no progress is made for a ticket, then start fail all tickets until
the first one that has progress made on its reservation (inclusive). In
this case this first ticket will be failed but at least it's space will
be reused via space_info_add_old_bytes.

Frankly this seem really arbitrary.

> 
> Signed-off-by: Josef Bacik <josef@toxicpanda.com>
> ---
>  fs/btrfs/extent-tree.c | 45 +++++++++++++++++++++++++--------------------
>  1 file changed, 25 insertions(+), 20 deletions(-)
> 
> diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c
> index e6bb6ce23c84..983d086fa768 100644
> --- a/fs/btrfs/extent-tree.c
> +++ b/fs/btrfs/extent-tree.c
> @@ -4791,6 +4791,7 @@ static void shrink_delalloc(struct btrfs_fs_info *fs_info, u64 to_reclaim,
>  }
>  
>  struct reserve_ticket {
> +	u64 orig_bytes;
>  	u64 bytes;
>  	int error;
>  	struct list_head list;
> @@ -5012,7 +5013,7 @@ static inline int need_do_async_reclaim(struct btrfs_fs_info *fs_info,
>  		!test_bit(BTRFS_FS_STATE_REMOUNTING, &fs_info->fs_state));
>  }
>  
> -static void wake_all_tickets(struct list_head *head)
> +static bool wake_all_tickets(struct list_head *head)
>  {
>  	struct reserve_ticket *ticket;
>  
> @@ -5021,7 +5022,10 @@ static void wake_all_tickets(struct list_head *head)
>  		list_del_init(&ticket->list);
>  		ticket->error = -ENOSPC;
>  		wake_up(&ticket->wait);
> +		if (ticket->bytes != ticket->orig_bytes)
> +			return true;
>  	}
> +	return false;
>  }
>  
>  /*
> @@ -5089,8 +5093,12 @@ static void btrfs_async_reclaim_metadata_space(struct work_struct *work)
>  		if (flush_state > COMMIT_TRANS) {
>  			commit_cycles++;
>  			if (commit_cycles > 2) {
> -				wake_all_tickets(&space_info->tickets);
> -				space_info->flush = 0;
> +				if (wake_all_tickets(&space_info->tickets)) {
> +					flush_state = FLUSH_DELAYED_ITEMS_NR;
> +					commit_cycles--;
> +				} else {
> +					space_info->flush = 0;
> +				}
>  			} else {
>  				flush_state = FLUSH_DELAYED_ITEMS_NR;
>  			}
> @@ -5142,10 +5150,11 @@ static void priority_reclaim_metadata_space(struct btrfs_fs_info *fs_info,
>  
>  static int wait_reserve_ticket(struct btrfs_fs_info *fs_info,
>  			       struct btrfs_space_info *space_info,
> -			       struct reserve_ticket *ticket, u64 orig_bytes)
> +			       struct reserve_ticket *ticket)
>  
>  {
>  	DEFINE_WAIT(wait);
> +	u64 reclaim_bytes = 0;
>  	int ret = 0;
>  
>  	spin_lock(&space_info->lock);
> @@ -5166,14 +5175,12 @@ static int wait_reserve_ticket(struct btrfs_fs_info *fs_info,
>  		ret = ticket->error;
>  	if (!list_empty(&ticket->list))
>  		list_del_init(&ticket->list);
> -	if (ticket->bytes && ticket->bytes < orig_bytes) {
> -		u64 num_bytes = orig_bytes - ticket->bytes;
> -		update_bytes_may_use(space_info, -num_bytes);
> -		trace_btrfs_space_reservation(fs_info, "space_info",
> -					      space_info->flags, num_bytes, 0);
> -	}
> +	if (ticket->bytes && ticket->bytes < ticket->orig_bytes)
> +		reclaim_bytes = ticket->orig_bytes - ticket->bytes;
>  	spin_unlock(&space_info->lock);
>  
> +	if (reclaim_bytes)
> +		space_info_add_old_bytes(fs_info, space_info, reclaim_bytes);
>  	return ret;
>  }
>  
> @@ -5199,6 +5206,7 @@ static int __reserve_metadata_bytes(struct btrfs_fs_info *fs_info,
>  {
>  	struct reserve_ticket ticket;
>  	u64 used;
> +	u64 reclaim_bytes = 0;
>  	int ret = 0;
>  
>  	ASSERT(orig_bytes);
> @@ -5234,6 +5242,7 @@ static int __reserve_metadata_bytes(struct btrfs_fs_info *fs_info,
>  	 * the list and we will do our own flushing further down.
>  	 */
>  	if (ret && flush != BTRFS_RESERVE_NO_FLUSH) {
> +		ticket.orig_bytes = orig_bytes;
>  		ticket.bytes = orig_bytes;
>  		ticket.error = 0;
>  		init_waitqueue_head(&ticket.wait);
> @@ -5274,25 +5283,21 @@ static int __reserve_metadata_bytes(struct btrfs_fs_info *fs_info,
>  		return ret;
>  
>  	if (flush == BTRFS_RESERVE_FLUSH_ALL)
> -		return wait_reserve_ticket(fs_info, space_info, &ticket,
> -					   orig_bytes);
> +		return wait_reserve_ticket(fs_info, space_info, &ticket);
>  
>  	ret = 0;
>  	priority_reclaim_metadata_space(fs_info, space_info, &ticket);
>  	spin_lock(&space_info->lock);
>  	if (ticket.bytes) {
> -		if (ticket.bytes < orig_bytes) {
> -			u64 num_bytes = orig_bytes - ticket.bytes;
> -			update_bytes_may_use(space_info, -num_bytes);
> -			trace_btrfs_space_reservation(fs_info, "space_info",
> -						      space_info->flags,
> -						      num_bytes, 0);
> -
> -		}
> +		if (ticket.bytes < orig_bytes)
> +			reclaim_bytes = orig_bytes - ticket.bytes;
>  		list_del_init(&ticket.list);
>  		ret = -ENOSPC;
>  	}
>  	spin_unlock(&space_info->lock);
> +
> +	if (reclaim_bytes)
> +		space_info_add_old_bytes(fs_info, space_info, reclaim_bytes);
>  	ASSERT(list_empty(&ticket.list));
>  	return ret;
>  }
> 

  reply index

Thread overview: 19+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2018-11-21 19:03 [PATCH 0/8] Enospc cleanups and fixes Josef Bacik
2018-11-21 19:03 ` [PATCH 1/8] btrfs: check if free bgs for commit Josef Bacik
2018-11-26 10:45   ` Nikolay Borisov
2018-11-21 19:03 ` [PATCH 2/8] btrfs: dump block_rsv whe dumping space info Josef Bacik
2018-11-21 19:03 ` [PATCH 3/8] btrfs: don't use global rsv for chunk allocation Josef Bacik
2018-11-26 11:25   ` Nikolay Borisov
2018-11-21 19:03 ` [PATCH 4/8] btrfs: add ALLOC_CHUNK_FORCE to the flushing code Josef Bacik
2018-11-26 11:28   ` Nikolay Borisov
2018-11-21 19:03 ` [PATCH 5/8] btrfs: don't enospc all tickets on flush failure Josef Bacik
2018-11-26 12:25   ` Nikolay Borisov [this message]
2018-11-27 19:46     ` Josef Bacik
2018-11-28  8:11       ` Nikolay Borisov
2018-11-21 19:03 ` [PATCH 6/8] btrfs: loop in inode_rsv_refill Josef Bacik
2018-11-21 19:03 ` [PATCH 7/8] btrfs: be more explicit about allowed flush states Josef Bacik
2018-11-26 12:41   ` Nikolay Borisov
2018-11-26 12:45     ` Nikolay Borisov
2018-11-21 19:03 ` [PATCH 8/8] btrfs: reserve extra space during evict() Josef Bacik
2018-12-03 15:24 [PATCH 0/8][V2] Enospc cleanups and fixeS Josef Bacik
2018-12-03 15:24 ` [PATCH 5/8] btrfs: don't enospc all tickets on flush failure Josef Bacik
2018-12-11 14:32   ` Nikolay Borisov

Reply instructions:

You may reply publically to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=ccd81359-764f-6651-a738-ba3b3843b9b3@suse.com \
    --to=nborisov@suse.com \
    --cc=josef@toxicpanda.com \
    --cc=kernel-team@fb.com \
    --cc=linux-btrfs@vger.kernel.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link

Linux-BTRFS Archive on lore.kernel.org

Archives are clonable:
	git clone --mirror https://lore.kernel.org/linux-btrfs/0 linux-btrfs/git/0.git

	# If you have public-inbox 1.1+ installed, you may
	# initialize and index your mirror using the following commands:
	public-inbox-init -V2 linux-btrfs linux-btrfs/ https://lore.kernel.org/linux-btrfs \
		linux-btrfs@vger.kernel.org linux-btrfs@archiver.kernel.org
	public-inbox-index linux-btrfs


Newsgroup available over NNTP:
	nntp://nntp.lore.kernel.org/org.kernel.vger.linux-btrfs


AGPL code for this site: git clone https://public-inbox.org/ public-inbox