linux-mm.kvack.org archive mirror
 help / color / mirror / Atom feed
From: Liu Bo <bo.li.liu@oracle.com>
To: Josef Bacik <josef@toxicpanda.com>
Cc: hannes@cmpxchg.org, linux-mm@kvack.org,
	akpm@linux-foundation.org, jack@suse.cz,
	linux-fsdevel@vger.kernel.org, kernel-team@fb.com,
	linux-btrfs@vger.kernel.org, Josef Bacik <jbacik@fb.com>
Subject: Re: [PATCH 10/10] btrfs: rework end io for extent buffer reads
Date: Thu, 16 Nov 2017 17:24:53 -0800	[thread overview]
Message-ID: <20171117012453.GG23614@dhcp-whq-twvpn-1-vpnpool-10-159-142-193.vpn.oracle.com> (raw)
In-Reply-To: <1510696616-8489-10-git-send-email-josef@toxicpanda.com>

On Tue, Nov 14, 2017 at 04:56:56PM -0500, Josef Bacik wrote:
> From: Josef Bacik <jbacik@fb.com>
> 
> Now that the only thing that keeps eb's alive is io_pages and it's
> refcount we need to hold the eb ref for the entire end io call so we
> don't get it removed out from underneath us.  Also the hooks make no
> sense for us now, so rework this to be cleaner.
> 
> Signed-off-by: Josef Bacik <jbacik@fb.com>
> ---
>  fs/btrfs/disk-io.c   | 63 ++++--------------------------------------------
>  fs/btrfs/disk-io.h   |  1 +
>  fs/btrfs/extent_io.c | 67 +++++++++++++++++++++++++++-------------------------
>  3 files changed, 41 insertions(+), 90 deletions(-)
> 
> diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c
> index 7ccb6d839126..459491d662a0 100644
> --- a/fs/btrfs/disk-io.c
> +++ b/fs/btrfs/disk-io.c
> @@ -755,33 +755,13 @@ static int check_node(struct btrfs_root *root, struct extent_buffer *node)
>  	return ret;
>  }
>  
> -static int btree_readpage_end_io_hook(struct btrfs_io_bio *io_bio,
> -				      u64 phy_offset, struct page *page,
> -				      u64 start, u64 end, int mirror)
> +int btrfs_extent_buffer_end_read(struct extent_buffer *eb, int mirror)
>  {
> +	struct btrfs_fs_info *fs_info = eb->eb_info->fs_info;
> +	struct btrfs_root *root = fs_info->tree_root;
>  	u64 found_start;
>  	int found_level;
> -	struct extent_buffer *eb;
> -	struct btrfs_root *root;
> -	struct btrfs_fs_info *fs_info;
>  	int ret = 0;
> -	int reads_done;
> -
> -	if (!page->private)
> -		goto out;
> -
> -	eb = (struct extent_buffer *)page->private;
> -
> -	/* the pending IO might have been the only thing that kept this buffer
> -	 * in memory.  Make sure we have a ref for all this other checks
> -	 */
> -	extent_buffer_get(eb);
> -	fs_info = eb->eb_info->fs_info;
> -	root = fs_info->tree_root;
> -
> -	reads_done = atomic_dec_and_test(&eb->io_pages);
> -	if (!reads_done)
> -		goto err;
>  
>  	eb->read_mirror = mirror;
>  	if (test_bit(EXTENT_BUFFER_READ_ERR, &eb->bflags)) {
> @@ -833,45 +813,14 @@ static int btree_readpage_end_io_hook(struct btrfs_io_bio *io_bio,
>  	if (!ret)
>  		set_extent_buffer_uptodate(eb);
>  err:
> -	if (reads_done &&
> -	    test_and_clear_bit(EXTENT_BUFFER_READAHEAD, &eb->bflags))
> +	if (test_and_clear_bit(EXTENT_BUFFER_READAHEAD, &eb->bflags))
>  		btree_readahead_hook(eb, ret);
>  
> -	if (ret) {
> -		/*
> -		 * our io error hook is going to dec the io pages
> -		 * again, we have to make sure it has something
> -		 * to decrement.
> -		 *
> -		 * TODO: Kill this, we've re-arranged how this works now so we
> -		 * don't need to do this io_pages dance.
> -		 */
> -		atomic_inc(&eb->io_pages);
> +	if (ret)
>  		clear_extent_buffer_uptodate(eb);
> -	}
> -	if (reads_done) {
> -		clear_bit(EXTENT_BUFFER_READING, &eb->bflags);
> -		smp_mb__after_atomic();
> -		wake_up_bit(&eb->bflags, EXTENT_BUFFER_READING);
> -	}
> -	free_extent_buffer(eb);
> -out:
>  	return ret;
>  }
>  
> -static int btree_io_failed_hook(struct page *page, int failed_mirror)
> -{
> -	struct extent_buffer *eb;
> -
> -	eb = (struct extent_buffer *)page->private;
> -	set_bit(EXTENT_BUFFER_READ_ERR, &eb->bflags);
> -	eb->read_mirror = failed_mirror;
> -	atomic_dec(&eb->io_pages);
> -	if (test_and_clear_bit(EXTENT_BUFFER_READAHEAD, &eb->bflags))
> -		btree_readahead_hook(eb, -EIO);
> -	return -EIO;	/* we fixed nothing */
> -}
> -
>  static void end_workqueue_bio(struct bio *bio)
>  {
>  	struct btrfs_end_io_wq *end_io_wq = bio->bi_private;
> @@ -4553,9 +4502,7 @@ static int btree_merge_bio_hook(struct page *page, unsigned long offset,
>  static const struct extent_io_ops btree_extent_io_ops = {
>  	/* mandatory callbacks */
>  	.submit_bio_hook = btree_submit_bio_hook,
> -	.readpage_end_io_hook = btree_readpage_end_io_hook,
>  	.merge_bio_hook = btree_merge_bio_hook,
> -	.readpage_io_failed_hook = btree_io_failed_hook,
>  	.set_range_writeback = btrfs_set_range_writeback,
>  	.tree_fs_info = btree_fs_info,
>  
> diff --git a/fs/btrfs/disk-io.h b/fs/btrfs/disk-io.h
> index 7f7c35d6347a..e1f4fef91547 100644
> --- a/fs/btrfs/disk-io.h
> +++ b/fs/btrfs/disk-io.h
> @@ -152,6 +152,7 @@ int btree_lock_page_hook(struct page *page, void *data,
>  int btrfs_get_num_tolerated_disk_barrier_failures(u64 flags);
>  int __init btrfs_end_io_wq_init(void);
>  void btrfs_end_io_wq_exit(void);
> +int btrfs_extent_buffer_end_read(struct extent_buffer *eb, int mirror);
>  
>  #ifdef CONFIG_DEBUG_LOCK_ALLOC
>  void btrfs_init_lockdep(void);
> diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c
> index 2077bd6ad1b3..1e5affee0f7e 100644
> --- a/fs/btrfs/extent_io.c
> +++ b/fs/btrfs/extent_io.c
> @@ -20,6 +20,7 @@
>  #include "locking.h"
>  #include "rcu-string.h"
>  #include "backref.h"
> +#include "disk-io.h"
>  
>  static struct kmem_cache *extent_state_cache;
>  static struct kmem_cache *extent_buffer_cache;
> @@ -5375,6 +5376,15 @@ int extent_buffer_uptodate(struct extent_buffer *eb)
>  	return test_bit(EXTENT_BUFFER_UPTODATE, &eb->bflags);
>  }
>  
> +static void mark_eb_failed(struct extent_buffer *eb, int failed_mirror)
> +{
> +	set_bit(EXTENT_BUFFER_READ_ERR, &eb->bflags);
> +	eb->read_mirror = failed_mirror;
> +	atomic_dec(&eb->io_pages);
> +	if (test_and_clear_bit(EXTENT_BUFFER_READAHEAD, &eb->bflags))
> +		btree_readahead_hook(eb, -EIO);
> +}
> +
>  static void end_bio_extent_buffer_readpage(struct bio *bio)
>  {
>  	struct btrfs_io_bio *io_bio = btrfs_io_bio(bio);
> @@ -5383,12 +5393,13 @@ static void end_bio_extent_buffer_readpage(struct bio *bio)
>  	u64 unlock_start = 0, unlock_len = 0;
>  	int mirror_num = io_bio->mirror_num;
>  	int uptodate = !bio->bi_status;
> -	int i, ret;
> +	int i;
>  
>  	bio_for_each_segment_all(bvec, bio, i) {
>  		struct page *page = bvec->bv_page;
>  		struct btrfs_eb_info *eb_info;
>  		struct extent_buffer *eb;
> +		int reads_done;
>  
>  		eb = (struct extent_buffer *)page->private;
>  		if (WARN_ON(!eb))
> @@ -5397,41 +5408,33 @@ static void end_bio_extent_buffer_readpage(struct bio *bio)
>  		eb_info = eb->eb_info;
>  		if (!tree)
>  			tree = &eb_info->io_tree;
> +		extent_buffer_get(eb);
> +		reads_done = atomic_dec_and_test(&eb->io_pages);
>  		if (uptodate) {
> -			/*
> -			 * btree_readpage_end_io_hook doesn't care about
> -			 * start/end so just pass 0.  We'll kill this later.
> -			 */
> -			ret = tree->ops->readpage_end_io_hook(io_bio, 0,
> -							      page, 0, 0,
> -							      mirror_num);
> -			if (ret) {
> -				uptodate = 0;
> -			} else {
> -				u64 start = eb->start;
> -				int c, num_pages;
> -
> -				num_pages = num_extent_pages(eb->start,
> -							     eb->len);
> -				for (c = 0; c < num_pages; c++) {
> -					if (eb->pages[c] == page)
> -						break;
> -					start += PAGE_SIZE;
> -				}
> -				clean_io_failure(eb_info->fs_info,
> -						 &eb_info->io_failure_tree,
> -						 tree, start, page, 0, 0);
> +			u64 start = eb->start;
> +			int c, num_pages;
> +
> +			num_pages = num_extent_pages(eb->start,
> +						     eb->len);
> +			for (c = 0; c < num_pages; c++) {
> +				if (eb->pages[c] == page)
> +					break;
> +				start += PAGE_SIZE;
>  			}
> +			clean_io_failure(eb_info->fs_info,
> +					 &eb_info->io_failure_tree,
> +					 tree, start, page, 0, 0);

We don't fix failures here, so this can be removed.

>  		}
> -		/*
> -		 * We never fix anything in btree_io_failed_hook.
> -		 *
> -		 * TODO: rework the io failed hook to not assume we can fix
> -		 * anything.
> -		 */
> +		if (reads_done && btrfs_extent_buffer_end_read(eb, mirror_num))
> +			uptodate = 0;
>  		if (!uptodate)
> -			tree->ops->readpage_io_failed_hook(page, mirror_num);
> -
> +			mark_eb_failed(eb, mirror_num);

Correct me if I'm wrong, if !uptodate, ->io_pages would be dec'd twice
here, eb would be checked without all pages hitting end_io.

Thanks,

-liubo
> +		if (reads_done) {
> +			clear_bit(EXTENT_BUFFER_READING, &eb->bflags);
> +			smp_mb__after_atomic();
> +			wake_up_bit(&eb->bflags, EXTENT_BUFFER_READING);
> +		}
> +		free_extent_buffer(eb);
>  		if (unlock_start == 0) {
>  			unlock_start = eb->start;
>  			unlock_len = PAGE_SIZE;
> -- 
> 2.7.5
> 

--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org.  For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>

  reply	other threads:[~2017-11-17  1:25 UTC|newest]

Thread overview: 19+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2017-11-14 21:56 [PATCH 01/10] remove mapping from balance_dirty_pages*() Josef Bacik
2017-11-14 21:56 ` [PATCH 02/10] writeback: convert WB_WRITTEN/WB_DIRITED counters to bytes Josef Bacik
2017-11-16 23:45   ` Liu Bo
2017-11-14 21:56 ` [PATCH 03/10] lib: add a batch size to fprop_global Josef Bacik
2017-11-22  8:47   ` Jan Kara
2017-11-22  8:54     ` Jan Kara
2017-11-14 21:56 ` [PATCH 04/10] lib: add a __fprop_add_percpu_max Josef Bacik
2017-11-14 21:56 ` [PATCH 05/10] writeback: convert the flexible prop stuff to bytes Josef Bacik
2017-11-14 21:56 ` [PATCH 06/10] writeback: add counters for metadata usage Josef Bacik
2017-11-22 10:21   ` Jan Kara
2017-11-14 21:56 ` [PATCH 07/10] writeback: introduce super_operations->write_metadata Josef Bacik
2017-11-14 21:56 ` [PATCH 08/10] export radix_tree_iter_tag_set Josef Bacik
2017-11-14 21:56 ` [PATCH 09/10] Btrfs: kill the btree_inode Josef Bacik
2017-11-17  1:03   ` Liu Bo
2017-11-17  1:13     ` Josef Bacik
2017-11-14 21:56 ` [PATCH 10/10] btrfs: rework end io for extent buffer reads Josef Bacik
2017-11-17  1:24   ` Liu Bo [this message]
2017-11-16 23:36 ` [PATCH 01/10] remove mapping from balance_dirty_pages*() Liu Bo
2017-11-21 22:45 ` Andrew Morton

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20171117012453.GG23614@dhcp-whq-twvpn-1-vpnpool-10-159-142-193.vpn.oracle.com \
    --to=bo.li.liu@oracle.com \
    --cc=akpm@linux-foundation.org \
    --cc=hannes@cmpxchg.org \
    --cc=jack@suse.cz \
    --cc=jbacik@fb.com \
    --cc=josef@toxicpanda.com \
    --cc=kernel-team@fb.com \
    --cc=linux-btrfs@vger.kernel.org \
    --cc=linux-fsdevel@vger.kernel.org \
    --cc=linux-mm@kvack.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).